Compare commits

...

42 Commits

Author SHA1 Message Date
Peter Slattery 45b7ae5e2a Languages can supply their own color overrides 2025-08-05 21:18:25 -07:00
Peter Slattery 504d902ddd Make initial parse and code index generation happen async 2025-08-05 15:39:04 -07:00
Peter Slattery 4d356ef7b9 Remove code_index__hash_file, it no longer does anything 2025-08-05 15:26:23 -07:00
Peter Slattery 507f4a9d04 Stop shifting nest lists - they are re-created each frame 2025-08-05 15:25:38 -07:00
Peter Slattery 1507f414b1 Cleaning up temp code, unused struct members, and using a Scratch_Block to store buffer_contents during parsing 2025-08-04 00:51:19 -07:00
Peter Slattery 8ad2b5bbff Incrementally update global_code_index.name_hash 2025-08-04 00:28:09 -07:00
Peter Slattery a92e364e37 Clean up re-parse range identification logic 2025-08-03 15:36:18 -07:00
Peter Slattery 6d77862b78 Collapse process query match logic back into tree_sitter_code_index_update_process_query_match 2025-08-03 00:00:24 -07:00
Peter Slattery 30ac43ce66 Prevent re-parsing the whole file by identifying the node containing the edit, or the node immediately before and after the edit, and parsing only those. Fall back to root for empty file case 2025-08-02 23:21:46 -07:00
Peter Slattery 12322796c2 Incrementally re-parse notes and scope delimiters within the range of a single edit. 2025-08-02 23:21:01 -07:00
Peter Slattery 6d7f1281b2 Allocate Scope_Delims and Notes from free list before allocating in chunks 2025-07-30 17:03:34 -07:00
Peter Slattery 69dc4f8e04 Remove Code_Index_Note_Ptr_Array from the codebase, use Code_Index_Note_List instead 2025-07-30 16:42:27 -07:00
Peter Slattery 76863fc03a Fully remove Code_Index_Nest_Ptr_Array from the codebase, use Code_Index_Nest_List instead 2025-07-30 16:27:50 -07:00
Peter Slattery 1dfc106e8c Parse delimiters before creating the nests tree 2025-07-30 15:38:49 -07:00
Peter Slattery ee082e42d1 tree_sitter_parse_full_file_async no longer updates the code index 2025-07-16 19:43:14 -07:00
Peter Slattery a072229d20 Removing unused function get_token_array_from_buffer_no_wait
- I added this previously, and the need for it has disappeared
2025-07-16 19:41:03 -07:00
Peter Slattery a34a24f4e7 Passing the old and new ranges from buffer_edit_range to Buffer_Tree_Sitter_Data so it can be accessed in the tick for incremental code index updating 2025-07-16 11:31:55 -07:00
Peter Slattery b56bd7caa8 Move code index updating to happen to modified buffers on the tick hook. This will unify the separate paths for updating the code index. 2025-07-16 11:24:23 -07:00
Peter Slattery 94991304b5 Temporarily move all tree-sitter operations to be synchronous 2025-07-16 11:21:41 -07:00
Peter Slattery 3f9b803c62 Move tree-sitter code index updating to an async process to unblock the ui while processing large files 2025-07-14 11:22:39 -07:00
Peter Slattery ae7440aa0b custom_render_buffer only requests the token array if it's being used for a feature the user actually requested since requesting the tokens needs to take the lock on the token array. 2025-07-14 11:22:08 -07:00
Peter Slattery 84b1b15fbb Implement get_token_array_from_buffer_no_wait to return an empty array if the lex task is currently running 2025-07-14 11:19:27 -07:00
Peter Slattery 9ecf49d278 Add typescript support and document how to add a language 2025-07-13 14:37:25 -07:00
Peter Slattery a5f13529c3 Set a per-language flag for enabling virtual indentation 2025-07-13 14:02:32 -07:00
Peter Slattery 5ed8767819 Buffering code index updates when there are many modified buffers within a single frame 2025-07-13 13:46:55 -07:00
Peter Slattery 18428ec90d Pull tree_sitter_code_index_update_single_buffer out as it's own routine, and use it in tree_sitter_parse_asyn__inner to avoid setting the buffer_modified flag 2025-07-13 13:27:21 -07:00
Peter Slattery 5ccd6dd2ab Implemented tree_sitter_code_index_update_tick - populates Code_Index_Nests and Code_Index_Notes from each languages Tags query
Switched tree_sitter_list_all_query_results to list Code_Index_Note's matching a Code_Index_Note_Kind in each file
- remove old query types NESTS, FUNCTIONS, TYPES
2025-07-13 11:19:29 -07:00
Peter Slattery 58374ba625 Incorporating the nest query into the cpp tags query 2025-07-13 11:17:56 -07:00
Peter Slattery 67e0420eb1 Indenting 4coder_code_index.cpp 2025-07-13 11:15:48 -07:00
Peter Slattery 336a98f701 Switched to using the default highlight and tags queries for cpp, jai, bash. 2025-07-13 08:44:45 -07:00
Peter Slattery c5a462993d Pull language registration into individual per-language files 2025-07-12 12:21:31 -07:00
Peter Slattery 60e850b2ff Add bash support 2025-07-12 12:19:23 -07:00
Peter Slattery 877527e918 Cleaning up noisy build logs 2025-07-11 17:58:58 -07:00
Peter Slattery 324a78ae1e Implemented function and type queries.
Implemented tree_sitter_list_all_* commands
2025-07-11 17:18:11 -07:00
Peter Slattery 086ac34c4d Jai syntax highlighting 2025-07-10 12:10:17 -07:00
Peter Slattery 79695eca2c tree sitter logic handles edits to a parsed buffer
- async_task_cancel_nowait implementation
2025-07-10 09:14:35 -07:00
Peter Slattery 43fb4a757a Implement Tree_Sitter_Language_Definition, handle registering languages by extension, and looking up the appropriate language definition for a buffer.
custom_begin_buffer uses new functions to identify which files to treat as code
implement custom_render_buffer which uses tree sitter data to color tokens
2025-07-10 08:53:54 -07:00
Peter Slattery 7caaed736b Initial tree sitter usage:
- identify buffer language
- custom_begin_buffer sets up necessary tree sitter state, and kicks off a parse task
- custom_end_buffer cleans up tree sitter data and kills async parse tasks
- tree_sitter_parse_async/__inner uses tree sitter api to get a tree of the buffer's code and stores it on the buffers managed scope
- tree_sitter_write_tree prints tree to a special *tree* buffer
- use new build scripts in 4coder project
2025-07-10 07:08:27 -07:00
Peter Slattery 1a97b41257 Add tree sitter to compile command 2025-07-10 06:01:48 -07:00
Peter Slattery 8a918eef82 Checking for color support in build scripts 2025-07-10 06:01:34 -07:00
Peter Slattery d615358064 Updated build scripts to use common include flags 2025-07-08 10:05:45 -07:00
Peter Slattery 0b712c50e9 Added tree-sitter to project and got it building as a static library 2025-07-07 20:26:10 -07:00
99 changed files with 2165738 additions and 1485 deletions

2
.gitignore vendored
View File

@ -1,3 +1,4 @@
.DS_Store
build/
build_new/temp
current_dist*/
@ -5,3 +6,4 @@ distributions/
build_stable/
code/generated
code/custom/generated
*.xcodeproj

View File

@ -4,6 +4,8 @@ Welcome to the 4coder community repository.
# Building
TODO - these are outdated
## Windows
1. Setup the MSVC toolchain in your environment, this can be done with the `code/custom/bin/setup_cl_x64.bat` script
2. call the `package.bat` script from the code directory
@ -78,3 +80,23 @@ You need to compile one of those file and run it from the `code` directory.
There is also `code\4ed_api_check.cpp` to verify the generated file but it's not clear at the moment what to check against.
- `code\4ed_generate_keycodes.cpp` is also a bit appart as it generates `code\custom\generated\4coder_event_codes.h` which are keyboard key codes and some event hook ids.
# Adding a Language
## Adding the Parser/Scanner to the tree-sitter static library
1. Create a folder named for your language in `non-source/foreign/tree-sitter/lang`.
2. Copy `parser.c` and `scanner.cc` into that folder
3. Add `build_tree_sitter_language "<your_folder_name>" "<Readable Language Name>"` to `build-libs.sh::build_tree_sitter()`
4. Run build-libs.sh and ensure your language built properly.
If there was an error try the following:
- if your error looks like a missing type, it's probably because your scanner/parser were written for a different version of tree sitter than is linked automatically. Most parsers will have their own tree-sitter directory next to the parser.c file that contains any of parser.h, array.h, and alloc.h. Copy this directory into your languages directory, and change the paths referencing them in your parser/scanner to be local. ie. change `#include "tree_sitter/parser.h"` to `#include "./tree_sitter/parser.h"`
## Adding the Language to 4coder
1. In `code/custom/languages` create `tree_sitter_<your_language_identifier>.h` by copying `tree_sitter_language_base.h`
2. Import the language into `code/custom/4coder_tree_sitter.cpp` at the top
3. Handle the TODO comments in your new language file
At this point, if there are no compilation errors, you should have syntax highlighting, virtual whitespace support, and basic goto definition support in your new language.

View File

@ -26,9 +26,12 @@ SCRIPTS_DIR="$PROJECT_ROOT/build_new/scripts"
HELPERS_DIR="$PROJECT_ROOT/build_new/helpers"
# Include directories
INCLUDES=(
"$CUSTOM_DIR"
"$FOREIGN_DIR/freetype2"
# TODO: this isn't actually being used - it should be
FCODER_INCLUDES=(
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
# "-I$FOREIGN_DIR/freetype2"
"-I$FOREIGN_DIR/tree-sitter/lib/include"
)
# =============================================================================

View File

@ -1,11 +1,34 @@
#!/bin/bash
# Colors & Styles for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NC='\033[0m' # No Color
colors_supported() {
# Check if stdout is a terminal
[[ -t 1 ]] || return 1
# Check if TERM is set and not "dumb"
[[ -n "$TERM" && "$TERM" != "dumb" ]] || return 1
# Check if tput is available and supports colors
if command -v tput >/dev/null 2>&1; then
tput setaf 1 >/dev/null 2>&1 || return 1
fi
return 0
}
if colors_supported; then
# Colors & Styles for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
BOLD='\033[1m'
NC='\033[0m' # No Color
else
RED=''
GREEN=''
BLUE=''
BOLD=''
NC=''
fi
print_success() {
printf "%b✓%b %s\n" "$GREEN" "$NC" "$1"

116
build_new/scripts/build-libs.sh Executable file
View File

@ -0,0 +1,116 @@
#!/bin/bash
# =============================================================================
# Configuration
# =============================================================================
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
CONFIG_DIR="$SCRIPT_DIR/../config"
# Source configuration files
source "$CONFIG_DIR/build-config.sh"
source "$HELPERS_DIR/print-routines.sh"
# =============================================================================
# Build Tree Sitter
# =============================================================================
build_tree_sitter_language() {
LANG_DIR=$1
LANG_NAME=$2
PARSER_SRC="${CUSTOM_ROOT}/lang/$LANG_DIR/parser.c"
PARSER_OUT="$TEMP_OUT_DIR/${LANG_DIR}_parser.o"
SCANNER_SRC="${CUSTOM_ROOT}/lang/$LANG_DIR/scanner.cc"
SCANNER_OUT="$TEMP_OUT_DIR/${LANG_DIR}_scanner.o"
if [ ! -f $SCANNER_SRC ]; then
SCANNER_SRC="${CUSTOM_ROOT}/lang/$LANG_DIR/scanner.c"
fi
print_step "Building tree-sitter $LANG_NAME Language Lib ($LANG_DIR)"
BUILT_ANYTHING=0
if [ -f $PARSER_SRC ]; then
echo " Building Parser..."
clang $CLANG_OPTS "${INCLUDES[@]}" "$PARSER_SRC" -o "$PARSER_OUT"
if [ $? == 0 ]; then
BUILT_ANYTHING=1
fi
fi
if [ -f $SCANNER_SRC ]; then
echo " Building Scanner..."
clang $CLANG_OPTS "${INCLUDES[@]}" "$SCANNER_SRC" -o "$SCANNER_OUT"
if [ $? == 0 ]; then
BUILT_ANYTHING=1
fi
fi
if [ $BUILT_ANYTHING == 1 ]; then
print_success "Complete"
else
print_warning "Failed to build anything."
fi
}
build_tree_sitter() {
BIN_NAME="custom_4coder"
CUSTOM_ROOT="$FOREIGN_DIR/tree-sitter"
INCLUDES=(
"-I$CUSTOM_ROOT/lib/src"
"-I$CUSTOM_ROOT/lib/include"
)
CLANG_OPTS=(
"-c" # Compile, don't link
"-O2" # Compile in release, regardless of 4coder build mode
"-g" # Debug info
)
TEMP_OUT_DIR=$BUILD_TEMP_DIR/tree-sitter
mkdir -p $TEMP_OUT_DIR
rm $TEMP_OUT_DIR/*.o
rm "$BUILD_TEMP_DIR/tree-sitter.a"
# Build tree-sitter.lib/.a
print_step "Building tree-sitter lib"
clang $CLANG_OPTS "${INCLUDES[@]}" "$CUSTOM_ROOT/lib/src/lib.c" -o $TEMP_OUT_DIR/tree-sitter.o
print_success "Complete"
build_tree_sitter_language "cpp" "C++"
build_tree_sitter_language "jai" "Jai"
build_tree_sitter_language "bash" "Bash"
build_tree_sitter_language "ts" "Typescript"
# Link tree-sitter lib and parser obj files into a static library to link into main custom dll
print_step "Linking tree-sitter static library"
ar rcs $BUILD_TEMP_DIR/tree-sitter.a $TEMP_OUT_DIR/*.o
print_success "Completed"
}
# =============================================================================
# Main
# =============================================================================
main() {
local config="${1:-debug}"
local arch="${2:-x64}"
print_step "macOS Build Process"
print_info "Configuration: $config"
print_info "Architecture: $arch"
# Create build directory
mkdir -p "$BUILD_DIR"
# Execute build steps
build_tree_sitter
}
# Only run main if script is executed directly (not sourced)
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
main "$@"
fi

View File

@ -111,24 +111,18 @@ build_core_engine() {
"-DFRED_INTERNAL"
)
# Include directories for custom layer
local include_flags=(
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
)
print_info "Compiling core application library..."
print_info "Input: $app_target"
print_info "Output: $output_lib"
# Compile core application shared library
if [[ "${BUILD_VERBOSE:-}" == "1" ]]; then
echo "Executing: $CXX ${build_flags[*]} ${include_flags[*]} -o $output_lib $app_target ${LINK_LIBS[*]}"
echo "Executing: $CXX ${build_flags[*]} ${FCODER_INCLUDES[*]} -o $output_lib $app_target ${LINK_LIBS[*]}"
fi
$CXX \
"${build_flags[@]}" \
"${include_flags[@]}" \
"${FCODER_INCLUDES[@]}" \
-o "$output_lib" \
"$app_target" \
"${LINK_LIBS[@]}"
@ -185,9 +179,7 @@ build_custom_layer() {
local preprocessed_file="$temp_dir/4coder_default_bindings.i"
$CXX \
-I"$CODE_DIR" \
-I"$CUSTOM_DIR" \
-I"$CUSTOM_DIR/generated" \
"${FCODER_INCLUDES[@]}" \
-DMETA_PASS \
-E \
"$custom_target" \
@ -202,9 +194,7 @@ build_custom_layer() {
$CXX \
"${COMPILE_FLAGS[@]}" \
-I"$CODE_DIR" \
-I"$CUSTOM_DIR" \
-I"$CUSTOM_DIR/generated" \
"${FCODER_INCLUDES[@]}" \
-o "$metadata_generator" \
"$metadata_source"
@ -230,8 +220,7 @@ build_custom_layer() {
"${COMPILE_FLAGS[@]}"
"-shared"
"-fPIC"
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
"${FCODER_INCLUDES[@]}"
"-DFRED_SUPER"
"-DFRED_INTERNAL"
)

View File

@ -1,6 +1,4 @@
#!/bin/bash
# build-macos.sh - macOS-specific build logic using clang
# Handles Objective-C++ files, frameworks, etc.
# Usage: ./build-macos.sh [debug|release] [x64|x86|arm64]
set -e # Exit on error
@ -137,24 +135,18 @@ build_core_engine() {
"-DFRED_INTERNAL"
)
# Include directories for custom layer
local include_flags=(
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
)
print_info "Compiling core application library..."
print_info "Input: $app_target"
print_info "Output: $output_lib"
# Compile core application shared library
if [[ "${BUILD_VERBOSE:-}" == "1" ]]; then
echo "Executing: $CXX ${build_flags[*]} ${include_flags[*]} -o $output_lib $app_target ${LINK_LIBS[*]}"
echo "Executing: $CXX ${build_flags[*]} ${FCODER_INCLUDES[*]} -o $output_lib $app_target ${LINK_LIBS[*]}"
fi
$CXX \
"${build_flags[@]}" \
"${include_flags[@]}" \
"${FCODER_INCLUDES[@]}" \
-o "$output_lib" \
"$app_target" \
"${LINK_LIBS[@]}"
@ -168,15 +160,11 @@ build_platform_layer() {
local platform_source="$CODE_DIR/platform_mac/mac_4ed.mm"
local output_exe="$BUILD_DIR/4ed"
local include_flags=(
"-I$CUSTOM_DIR"
)
# Build flags for executable
local build_flags=(
"${COMPILE_FLAGS[@]}"
"${PLATFORM_INCLUDES[@]}"
"${include_flags[@]}"
"${FCODER_INCLUDES[@]}"
"-DFRED_SUPER"
"-DFRED_INTERNAL"
)
@ -237,10 +225,10 @@ build_custom_layer() {
"${COMPILE_FLAGS[@]}"
"-shared"
"-fPIC"
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
"${FCODER_INCLUDES[@]}"
"-DFRED_SUPER"
"-DFRED_INTERNAL"
"$BUILD_TEMP_DIR/tree-sitter.a"
)
if [[ "${BUILD_VERBOSE:-}" == "1" ]]; then

View File

@ -17,10 +17,10 @@ META_MACROS="-DMETA_PASS"
print_step "Building Metadata"
print_info "Running C Preprocessor"
g++ -I"$CUSTOM_DIR" $META_MACROS $OPTS "$SOURCE" -E -o $PREPROC_FILE
g++ "${FCODER_INCLUDES[@]}" $META_MACROS $OPTS "$SOURCE" -E -o $PREPROC_FILE
print_info "Building Metadata Generator"
g++ -I"$CUSTOM_DIR" $OPTS $METADATA_GEN_SRC -o $METADATA_GEN_DST
g++ "${FCODER_INCLUDES[@]}" $OPTS $METADATA_GEN_SRC -o $METADATA_GEN_DST
print_info "Running Metadata Generator"
$METADATA_GEN_DST -R $CUSTOM_DIR "$PREPROC_FILE"

View File

@ -38,8 +38,7 @@ setup_win32_vars() {
# Base flags for Windows
COMMON_FLAGS=(
"${CLANG_OPTS_WINDOWS[@]}"
"-I$CODE_DIR"
"-I$FOREIGN_DIR/freetype2"
"${FCODER_INCLUDES[@]}"
)
# Architecture-specific flags
@ -161,24 +160,18 @@ build_core_engine() {
"-DFRED_INTERNAL"
)
# Include directories for custom layer
local include_flags=(
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
)
print_info "Compiling core application library..."
print_info "Input: $app_target"
print_info "Output: $output_lib"
# Compile core application shared library
if [[ "${BUILD_VERBOSE:-}" == "1" ]]; then
echo "Executing: $CXX ${build_flags[*]} ${include_flags[*]} -o $output_lib $app_target ${LINK_LIBS[*]}"
echo "Executing: $CXX ${build_flags[*]} ${FCODER_INCLUDES[*]} -o $output_lib $app_target ${LINK_LIBS[*]}"
fi
$CXX \
"${build_flags[@]}" \
"${include_flags[@]}" \
"${FCODER_INCLUDES[@]}" \
-o "$output_lib" \
"$app_target" \
"${LINK_LIBS[@]}"
@ -270,8 +263,7 @@ build_custom_layer() {
local build_flags=(
"${COMPILE_FLAGS[@]}"
"-shared"
"-I$CUSTOM_DIR"
"-I$CUSTOM_DIR/generated"
"${FCODER_INCLUDES[@]}"
"-DFRED_SUPER"
"-DFRED_INTERNAL"
)

View File

@ -19,37 +19,8 @@ HELPERS_DIR="$SCRIPT_DIR/../helpers"
# Source configuration files
source "$CONFIG_DIR/build-config.sh"
source "$HELPERS_DIR/print-routines.sh"
# =============================================================================
# Utility Functions
# =============================================================================
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
print_success() {
echo -e "${GREEN}${NC} $1"
}
print_warning() {
echo -e "${YELLOW}${NC} $1"
}
print_error() {
echo -e "${RED}${NC} $1"
}
print_info() {
echo -e "${BLUE}${NC} $1"
}
print_step() {
echo -e "${BLUE}===${NC} $1 ${BLUE}===${NC}"
}
show_usage() {
echo "Usage: $0 [platform] [config] [arch]"

View File

@ -155,7 +155,7 @@ api_type_match(API_Type *a, API_Type *b){
result = true;
}
}break;
case APITypeKind_Enum:
{
if (a->enum_type.val_count == b->enum_type.val_count &&
@ -172,7 +172,7 @@ api_type_match(API_Type *a, API_Type *b){
}
}
}break;
case APITypeKind_Typedef:
{
if (string_match(a->typedef_type.name, b->typedef_type.name) &&
@ -209,9 +209,9 @@ api_get_callable_name(Arena *arena, String_Const_u8 api_name, String_Const_u8 na
function void
generate_api_master_list(Arena *scratch, API_Definition *api, API_Generation_Flag flags, FILE *out, String_Const_u8 generated_by){
fprintf(out, "/* Generated by \"%.*s\" */\n\n", string_expand(generated_by));
for (API_Call *call = api->first_call;
call != 0;
call = call->next){
@ -240,9 +240,9 @@ generate_api_master_list(Arena *scratch, API_Definition *api, API_Generation_Fla
function void
generate_header(Arena *scratch, API_Definition *api, API_Generation_Flag flags, FILE *out, String_Const_u8 generated_by){
fprintf(out, "/* Generated by \"%.*s\" */\n\n", string_expand(generated_by));
for (API_Call *call = api->first_call;
call != 0;
call = call->next){
@ -269,9 +269,9 @@ generate_header(Arena *scratch, API_Definition *api, API_Generation_Flag flags,
}
fprintf(out, ")\n");
}
fprintf(out, "\n");
for (API_Call *call = api->first_call;
call != 0;
call = call->next){
@ -296,9 +296,9 @@ generate_header(Arena *scratch, API_Definition *api, API_Generation_Flag flags,
}
fprintf(out, ");\n");
}
fprintf(out, "\n");
fprintf(out, "struct API_VTable_%.*s{\n", string_expand(api->name));
for (API_Call *call = api->first_call;
call != 0;
@ -311,9 +311,9 @@ generate_header(Arena *scratch, API_Definition *api, API_Generation_Flag flags,
fprintf(out, ";\n");
}
fprintf(out, "};\n");
fprintf(out, "\n");
fprintf(out, "#if defined(STATIC_LINK_API)\n");
for (API_Call *call = api->first_call;
call != 0;
@ -357,9 +357,9 @@ generate_header(Arena *scratch, API_Definition *api, API_Generation_Flag flags,
function void
generate_cpp(Arena *scratch, API_Definition *api, API_Generation_Flag flags, FILE *out, String_Const_u8 generated_by){
fprintf(out, "/* Generated by \"%.*s\" */\n\n", string_expand(generated_by));
fprintf(out, "function void\n");
fprintf(out, "%.*s_api_fill_vtable(API_VTable_%.*s *vtable){\n",
string_expand(api->name),
@ -374,7 +374,7 @@ generate_cpp(Arena *scratch, API_Definition *api, API_Generation_Flag flags, FIL
}
fprintf(out, "}\n");
fprintf(out, "\n");
fprintf(out, "#if defined(DYNAMIC_LINK_API)\n");
fprintf(out, "function void\n");
fprintf(out, "%.*s_api_read_vtable(API_VTable_%.*s *vtable){\n",
@ -395,15 +395,15 @@ generate_cpp(Arena *scratch, API_Definition *api, API_Generation_Flag flags, FIL
function void
generate_constructor(Arena *scratch, API_Definition *api, API_Generation_Flag flags, FILE *out, String_Const_u8 generated_by){
fprintf(out, "/* Generated by \"%.*s\" */\n\n", string_expand(generated_by));
fprintf(out, "function API_Definition*\n");
fprintf(out, "%.*s_api_construct(Arena *arena){\n",
string_expand(api->name));
fprintf(out, " API_Definition *result = begin_api(arena, \"%.*s\");\n",
string_expand(api->name));
for (API_Call *call = api->first_call;
call != 0;
call = call->next){
@ -414,7 +414,7 @@ generate_constructor(Arena *scratch, API_Definition *api, API_Generation_Flag fl
"string_u8_litexpr(\"\"));\n",
string_expand(call->name),
string_expand(call->return_type));
if (call->params.count == 0){
fprintf(out, " (void)call;\n");
}
@ -427,10 +427,10 @@ generate_constructor(Arena *scratch, API_Definition *api, API_Generation_Flag fl
string_expand(param->name));
}
}
fprintf(out, " }\n");
}
fprintf(out, " return(result);\n");
fprintf(out, "}\n");
}
@ -440,15 +440,15 @@ generate_constructor(Arena *scratch, API_Definition *api, API_Generation_Flag fl
function b32
api_definition_generate_api_includes(Arena *arena, API_Definition *api, Generated_Group group, API_Generation_Flag flags, String_Const_u8 generated_by){
// NOTE(allen): Arrange output files
String_Const_u8 path_to_self = string_u8_litexpr(__FILE__);
path_to_self = string_remove_last_folder(path_to_self);
String_Const_u8 fname_ml = {};
String_Const_u8 fname_h = {};
String_Const_u8 fname_cpp = {};
String_Const_u8 fname_con = {};
String_Const_u8 root = {};
switch (group){
case GeneratedGroup_Core:
@ -460,67 +460,69 @@ api_definition_generate_api_includes(Arena *arena, API_Definition *api, Generate
root = string_u8_litexpr("custom/generated/");
}break;
}
fname_ml = push_u8_stringf(arena, "%.*s%.*s%.*s_api_master_list.h",
string_expand(path_to_self),
string_expand(root),
string_expand(api->name));
fname_h = push_u8_stringf(arena, "%.*s%.*s%.*s_api.h",
string_expand(path_to_self),
string_expand(root),
string_expand(api->name));
fname_cpp = push_u8_stringf(arena, "%.*s%.*s%.*s_api.cpp",
string_expand(path_to_self),
string_expand(root),
string_expand(api->name));
fname_con = push_u8_stringf(arena, "%.*s%.*s%.*s_api_constructor.cpp",
string_expand(path_to_self),
string_expand(root),
string_expand(api->name));
FILE *out_file_ml = fopen((char*)fname_ml.str, "wb");
if (out_file_ml == 0){
printf("could not open output file: '%s'\n", fname_ml.str);
return(false);
}
FILE *out_file_h = fopen((char*)fname_h.str, "wb");
if (out_file_h == 0){
printf("could not open output file: '%s'\n", fname_h.str);
return(false);
}
FILE *out_file_cpp = fopen((char*)fname_cpp.str, "wb");
if (out_file_cpp == 0){
printf("could not open output file: '%s'\n", fname_cpp.str);
return(false);
}
FILE *out_file_con = fopen((char*)fname_con.str, "wb");
if (out_file_cpp == 0){
printf("could not open output file: '%s'\n", fname_con.str);
return(false);
}
#if 0
printf("%s:1:\n", fname_ml.str);
printf("%s:1:\n", fname_h.str);
printf("%s:1:\n", fname_cpp.str);
printf("%s:1:\n", fname_con.str);
#endif
////////////////////////////////
// NOTE(allen): Generate output
generate_api_master_list(arena, api, flags, out_file_ml, generated_by);
generate_header(arena, api, flags, out_file_h, generated_by);
generate_cpp(arena, api, flags, out_file_cpp, generated_by);
generate_constructor(arena, api, flags, out_file_con, generated_by);
////////////////////////////////
fclose(out_file_ml);
fclose(out_file_h);
fclose(out_file_cpp);
@ -587,7 +589,7 @@ api_definition_check(Arena *arena, API_Definition *correct, API_Definition *remo
b32 report_missing = HasFlag(flags, APICheck_ReportMissingAPI);
b32 report_extra = HasFlag(flags, APICheck_ReportExtraAPI);
b32 report_mismatch = HasFlag(flags, APICheck_ReportMismatchAPI);
b32 iterate_correct = (report_missing || report_mismatch);
if (iterate_correct){
for (API_Call *call = correct->first_call;
@ -605,7 +607,7 @@ api_definition_check(Arena *arena, API_Definition *correct, API_Definition *remo
}
}
}
b32 iterate_remote = (report_extra);
if (iterate_remote){
for (API_Call *call = remote->first_call;
@ -625,7 +627,7 @@ function void
api_list_check(Arena *arena, API_Definition_List *correct, API_Definition_List *remote, API_Check_Flag flags, List_String_Const_u8 *error_list){
b32 report_missing = HasFlag(flags, APICheck_ReportMissingAPI);
b32 report_extra = HasFlag(flags, APICheck_ReportExtraAPI);
b32 iterate_correct = (report_missing);
if (iterate_correct){
for (API_Definition *api = correct->first;
@ -638,7 +640,7 @@ api_list_check(Arena *arena, API_Definition_List *correct, API_Definition_List *
}
}
}
b32 iterate_remote = (report_extra);
if (iterate_remote){
for (API_Definition *api = remote->first;
@ -652,7 +654,7 @@ api_list_check(Arena *arena, API_Definition_List *correct, API_Definition_List *
}
}
}
for (API_Definition *api = correct->first;
api != 0;
api = api->next){

View File

@ -257,6 +257,7 @@ get_active_edit_behaviors(Models *models, Editing_File *file){
api(custom) function b32
buffer_replace_range(Application_Links *app, Buffer_ID buffer_id, Range_i64 range, String_Const_u8 string)
{
ProfileScope(app, "buffer_replace_range");
Models *models = (Models*)app->cmd_context;
Editing_File *file = imp_get_file(models, buffer_id);
b32 result = false;
@ -338,7 +339,7 @@ buffer_seek_character_class(Application_Links *app, Buffer_ID buffer, Character_
Scratch_Block scratch(app);
Gap_Buffer *gap_buffer = &file->state.buffer;
List_String_Const_u8 chunks_list = buffer_get_chunks(scratch, gap_buffer);
if (chunks_list.node_count > 0){
// TODO(allen): If you are reading this comment, then I haven't revisited this to tighten it up yet.
// buffer_seek_character_class was originally implemented using the chunk indexer helper
@ -358,7 +359,7 @@ buffer_seek_character_class(Application_Links *app, Buffer_ID buffer, Character_
chunks.vals[chunks.count] = node->string;
chunks.count += 1;
}
i64 size = buffer_size(gap_buffer);
start_pos = clamp(-1, start_pos, size);
Buffer_Chunk_Position pos = buffer_get_chunk_position(chunks, size, start_pos);
@ -734,22 +735,22 @@ buffer_get_setting(Application_Links *app, Buffer_ID buffer_id, Buffer_Setting_I
{
*value_out = file->settings.unimportant;
}break;
case BufferSetting_Unkillable:
{
*value_out = (file->settings.never_kill || file->settings.unkillable);
}break;
case BufferSetting_ReadOnly:
{
*value_out = file->settings.read_only;
}break;
case BufferSetting_RecordsHistory:
{
*value_out = history_is_activated(&file->state.history);
}break;
default:
{
result = false;
@ -777,17 +778,17 @@ buffer_set_setting(Application_Links *app, Buffer_ID buffer_id, Buffer_Setting_I
file_set_unimportant(file, false);
}
}break;
case BufferSetting_Unkillable:
{
file->settings.unkillable = (value != 0);
}break;
case BufferSetting_ReadOnly:
{
file->settings.read_only = (value != 0);
}break;
case BufferSetting_RecordsHistory:
{
if (value){
@ -801,14 +802,14 @@ buffer_set_setting(Application_Links *app, Buffer_ID buffer_id, Buffer_Setting_I
}
}
}break;
default:
{
result = 0;
}break;
}
}
return(result);
}
@ -854,7 +855,7 @@ buffer_save(Application_Links *app, Buffer_ID buffer_id, String_Const_u8 file_na
{
Models *models = (Models*)app->cmd_context;
Editing_File *file = imp_get_file(models, buffer_id);
b32 result = false;
if (api_check_buffer(file)){
b32 skip_save = false;
@ -863,7 +864,7 @@ buffer_save(Application_Links *app, Buffer_ID buffer_id, String_Const_u8 file_na
skip_save = true;
}
}
if (!skip_save){
Thread_Context *tctx = app->tctx;
Scratch_Block scratch(tctx);
@ -872,7 +873,7 @@ buffer_save(Application_Links *app, Buffer_ID buffer_id, String_Const_u8 file_na
result = true;
}
}
return(result);
}
@ -891,16 +892,16 @@ buffer_kill(Application_Links *app, Buffer_ID buffer_id, Buffer_Kill_Flag flags)
if (models->end_buffer != 0){
models->end_buffer(app, file->id);
}
buffer_unbind_name_low_level(working_set, file);
if (file->canon.name_size != 0){
buffer_unbind_file(working_set, file);
}
file_free(tctx, models, file);
working_set_free_file(&models->heap, working_set, file);
Layout *layout = &models->layout;
Node *order = &working_set->touch_order_sentinel;
Node *file_node = order->next;
for (Panel *panel = layout_get_first_open_panel(layout);
@ -919,7 +920,7 @@ buffer_kill(Application_Links *app, Buffer_ID buffer_id, Buffer_Kill_Flag flags)
Assert(file_node != order);
}
}
Child_Process_Container *child_processes = &models->child_processes;
for (Node *node = child_processes->child_process_active_list.next;
node != &child_processes->child_process_active_list;
@ -929,7 +930,7 @@ buffer_kill(Application_Links *app, Buffer_ID buffer_id, Buffer_Kill_Flag flags)
child_process->out_file = 0;
}
}
result = BufferKillResult_Killed;
}
else{
@ -956,20 +957,20 @@ buffer_reopen(Application_Links *app, Buffer_ID buffer_id, Buffer_Reopen_Flag fl
Plat_Handle handle = {};
if (system_load_handle(scratch, (char*)file->canon.name_space, &handle)){
File_Attributes attributes = system_load_attributes(handle);
char *file_memory = push_array(scratch, char, (i32)attributes.size);
if (file_memory != 0){
if (system_load_file(handle, file_memory, (i32)attributes.size)){
system_load_close(handle);
// TODO(allen): try(perform a diff maybe apply edits in reopen)
i32 line_numbers[16];
i32 column_numbers[16];
View *vptrs[16];
i32 vptr_count = 0;
Layout *layout = &models->layout;
for (Panel *panel = layout_get_first_open_panel(layout);
panel != 0;
@ -985,15 +986,15 @@ buffer_reopen(Application_Links *app, Buffer_ID buffer_id, Buffer_Reopen_Flag fl
++vptr_count;
}
}
Working_Set *working_set = &models->working_set;
file_free(tctx, models, file);
working_set_file_default_settings(working_set, file);
file_create_from_string(tctx, models, file, SCu8(file_memory, attributes.size), attributes);
for (i32 i = 0; i < vptr_count; ++i){
view_set_file(tctx, models, vptrs[i], file);
vptrs[i]->file = file;
i64 line = line_numbers[i];
i64 col = column_numbers[i];
@ -1303,13 +1304,13 @@ panel_set_split(Application_Links *app, Panel_ID panel_id, Panel_Split_Kind kind
{
panel->split.v_f32 = clamp(0.f, t, 1.f);
}break;
case PanelSplitKind_FixedPixels_Max:
case PanelSplitKind_FixedPixels_Min:
{
panel->split.v_i32 = i32_round32(t);
}break;
default:
{
print_message(app, string_u8_litexpr("Invalid split kind passed to panel_set_split, no change made to view layout"));
@ -1451,7 +1452,7 @@ view_get_setting(Application_Links *app, View_ID view_id, View_Setting_ID settin
{
Models *models = (Models*)app->cmd_context;
View *view = imp_get_view(models, view_id);
b32 result = false;
if (api_check_view(view)){
result = true;
@ -1460,17 +1461,17 @@ view_get_setting(Application_Links *app, View_ID view_id, View_Setting_ID settin
{
*value_out = view->show_whitespace;
}break;
case ViewSetting_ShowScrollbar:
{
*value_out = !view->hide_scrollbar;
}break;
case ViewSetting_ShowFileBar:
{
*value_out = !view->hide_file_bar;
}break;
default:
{
result = false;
@ -1485,7 +1486,7 @@ view_set_setting(Application_Links *app, View_ID view_id, View_Setting_ID settin
{
Models *models = (Models*)app->cmd_context;
View *view = imp_get_view(models, view_id);
b32 result = false;
if (api_check_view(view)){
result = true;
@ -1494,17 +1495,17 @@ view_set_setting(Application_Links *app, View_ID view_id, View_Setting_ID settin
{
view->show_whitespace = (b8)value;
}break;
case ViewSetting_ShowScrollbar:
{
view->hide_scrollbar = (b8)!value;
}break;
case ViewSetting_ShowFileBar:
{
view->hide_file_bar = (b8)!value;
}break;
default:
{
result = false;
@ -1637,7 +1638,7 @@ view_set_mark(Application_Links *app, View_ID view_id, Buffer_Seek seek)
{
Models *models = (Models*)app->cmd_context;
View *view = imp_get_view(models, view_id);
b32 result = false;
if (api_check_view(view)){
Editing_File *file = view->file;
@ -1831,19 +1832,19 @@ get_managed_scope_with_multiple_dependencies(Application_Links *app, Managed_Sco
{
Models *models = (Models*)app->cmd_context;
Lifetime_Allocator *lifetime_allocator = &models->lifetime_allocator;
Scratch_Block scratch(app);
// TODO(allen): revisit this
struct Node_Ptr{
Node_Ptr *next;
Lifetime_Object *object_ptr;
};
Node_Ptr *first = 0;
Node_Ptr *last = 0;
i32 member_count = 0;
b32 filled_array = true;
for (i32 i = 0; i < count; i += 1){
Dynamic_Workspace *workspace = get_dynamic_workspace(models, scopes[i]);
@ -1851,13 +1852,13 @@ get_managed_scope_with_multiple_dependencies(Application_Links *app, Managed_Sco
filled_array = false;
break;
}
switch (workspace->user_type){
case DynamicWorkspace_Global:
{
// NOTE(allen): (global_scope INTERSECT X) == X for all X, therefore we emit nothing when a global group is in the key list.
}break;
case DynamicWorkspace_Unassociated:
case DynamicWorkspace_Buffer:
case DynamicWorkspace_View:
@ -1869,7 +1870,7 @@ get_managed_scope_with_multiple_dependencies(Application_Links *app, Managed_Sco
new_node->object_ptr = object;
member_count += 1;
}break;
case DynamicWorkspace_Intersected:
{
Lifetime_Key *key = (Lifetime_Key*)workspace->user_back_ptr;
@ -1884,14 +1885,14 @@ get_managed_scope_with_multiple_dependencies(Application_Links *app, Managed_Sco
}
}
}break;
default:
{
InvalidPath;
}break;
}
}
Managed_Scope result = 0;
if (filled_array){
Lifetime_Object **object_ptr_array = push_array(scratch, Lifetime_Object*, member_count);
@ -1906,7 +1907,7 @@ get_managed_scope_with_multiple_dependencies(Application_Links *app, Managed_Sco
Lifetime_Key *key = lifetime_get_or_create_intersection_key(lifetime_allocator, object_ptr_array, member_count);
result = (Managed_Scope)key->dynamic_workspace.scope_id;
}
return(result);
}
@ -2640,7 +2641,7 @@ buffer_set_face(Application_Links *app, Buffer_ID buffer_id, Face_ID id)
{
Models *models = (Models*)app->cmd_context;
Editing_File *file = imp_get_file(models, buffer_id);
b32 did_change = false;
if (api_check_buffer(file)){
Face *face = font_set_face_from_id(&models->font_set, id);
@ -2912,13 +2913,13 @@ text_layout_create(Application_Links *app, Buffer_ID buffer_id, Rect_f32 rect, B
if (api_check_buffer(file)){
Thread_Context *tctx = app->tctx;
Face *face = file_get_face(models, file);
Gap_Buffer *buffer = &file->state.buffer;
Layout_Function *layout_func = file_get_layout_func(file);
Vec2_f32 dim = rect_dim(rect);
i64 line_count = buffer_line_count(buffer);
i64 line_number = buffer_point.line_number;
f32 y = -buffer_point.pixel_shift.y;
@ -2933,13 +2934,13 @@ text_layout_create(Application_Links *app, Buffer_ID buffer_id, Rect_f32 rect, B
}
y = next_y;
}
Range_i64 visible_line_number_range = Ii64(buffer_point.line_number, line_number);
Range_i64 visible_range = Ii64(buffer_get_first_pos_from_line_number(buffer, visible_line_number_range.min),
buffer_get_last_pos_from_line_number(buffer, visible_line_number_range.max));
i64 item_count = range_size_inclusive(visible_range);
Arena arena = make_arena_system();
Arena *arena_ptr = push_array_zero(&arena, Arena, 1);
*arena_ptr = arena;
@ -2992,16 +2993,16 @@ text_layout_line_on_screen(Application_Links *app, Text_Layout_ID layout_id, i64
if (layout == 0){
return(result);
}
Layout_Function *layout_func = layout->layout_func;
Rect_f32 rect = layout->rect;
if (range_contains_inclusive(layout->visible_line_number_range, line_number)){
Editing_File *file = imp_get_file(models, layout->buffer_id);
if (api_check_buffer(file)){
f32 width = rect_width(rect);
Face *face = file_get_face(models, file);
for (i64 line_number_it = layout->visible_line_number_range.first;;
line_number_it += 1){
Layout_Item_List line = file_get_line_layout(app->tctx, models, file,
@ -3013,7 +3014,7 @@ text_layout_line_on_screen(Application_Links *app, Text_Layout_ID layout_id, i64
}
result.min = result.max;
}
result += rect.y0 - layout->point.pixel_shift.y;
}
}
@ -3023,7 +3024,7 @@ text_layout_line_on_screen(Application_Links *app, Text_Layout_ID layout_id, i64
else if (line_number > layout->visible_line_number_range.max){
result = If32(rect.y1, rect.y1);
}
return(result);
}
@ -3037,14 +3038,14 @@ text_layout_character_on_screen(Application_Links *app, Text_Layout_ID layout_id
if (api_check_buffer(file)){
Gap_Buffer *buffer = &file->state.buffer;
i64 line_number = buffer_get_line_index(buffer, pos) + 1;
if (range_contains_inclusive(layout->visible_line_number_range, line_number)){
Rect_f32 rect = layout->rect;
f32 width = rect_width(rect);
Face *face = file_get_face(models, file);
Layout_Function *layout_func = layout->layout_func;
f32 y = 0.f;
Layout_Item_List line = {};
for (i64 line_number_it = layout->visible_line_number_range.first;;
@ -3057,7 +3058,7 @@ text_layout_character_on_screen(Application_Links *app, Text_Layout_ID layout_id
}
y += line.height;
}
// TODO(allen): optimization: This is some fairly heavy computation. We really
// need to accelerate the (pos -> item) lookup within a single
// Buffer_Layout_Item_List.
@ -3081,7 +3082,7 @@ text_layout_character_on_screen(Application_Links *app, Text_Layout_ID layout_id
}
}
}
Vec2_f32 shift = V2f32(rect.x0, rect.y0 + y) - layout->point.pixel_shift;
result.p0 += shift;
result.p1 += shift;

View File

@ -28,7 +28,7 @@ struct Event_Code_List{
Event_Code *first;
Event_Code *last;
i32 count;
String_Const_u8 code_prefix;
String_Const_u8 name_table;
};
@ -39,7 +39,7 @@ function void
generate_codes(Arena *scratch, Event_Code_List *list, FILE *out){
String_Const_u8 code_prefix = list->code_prefix;
String_Const_u8 name_table = list->name_table;
fprintf(out, "enum{\n");
i32 counter = 1;
for (Event_Code *code = list->first;
@ -51,7 +51,7 @@ generate_codes(Arena *scratch, Event_Code_List *list, FILE *out){
}
fprintf(out, " %.*s_COUNT = %d,\n", string_expand(code_prefix), counter);
fprintf(out, "};\n");
fprintf(out, "global char* %.*s[%.*s_COUNT] = {\n",
string_expand(name_table), string_expand(code_prefix));
fprintf(out, " \"None\",\n");
@ -70,7 +70,7 @@ function Event_Code*
add_code(Arena *arena, Event_Code_List *list, String_Const_u8 name){
Event_Code *code = push_array(arena, Event_Code, 1);
sll_queue_push(list->first, list->last, code);
list->count;
list->count += 1;
code->name = push_string_copy(arena, name);
return(code);
}
@ -171,30 +171,30 @@ make_core_list(Arena *arena){
int
main(void){
Arena arena = make_arena_malloc();
Event_Code_List key_list = make_key_list(&arena);
Event_Code_List mouse_list = make_mouse_list(&arena);
Event_Code_List core_list = make_core_list(&arena);
String_Const_u8 path_to_self = string_u8_litexpr(__FILE__);
path_to_self = string_remove_last_folder(path_to_self);
String_Const_u8 file_name =
push_u8_stringf(&arena, "%.*scustom/generated/4coder_event_codes.h",
string_expand(path_to_self));
FILE *out = fopen((char*)file_name.str, "wb");
if (out == 0){
printf("could not open output file '%s'\n", file_name.str);
exit(1);
}
fprintf(out, "/* Generated by: " __FILE__ );
fprintf(out, " */\n" );
generate_codes(&arena, &key_list, out);
generate_codes(&arena, &mouse_list, out);
generate_codes(&arena, &core_list, out);
fclose(out);
return(0);
}

File diff suppressed because it is too large Load Diff

View File

@ -7,17 +7,37 @@
#if !defined(FCODER_CODE_INDEX_H)
#define FCODER_CODE_INDEX_H
typedef i32 Code_Index_Scope_Delim_Kind;
enum {
CodeIndexScopeDelim_ScopeOpen,
CodeIndexScopeDelim_ScopeClose,
CodeIndexScopeDelim_ParenOpen,
CodeIndexScopeDelim_ParenClose,
CodeIndexScopeDelim_BracketOpen,
CodeIndexScopeDelim_BracketClose
};
struct Code_Index_Scope_Delim {
Code_Index_Scope_Delim_Kind kind;
i32 depth;
Range_i64 pos;
Code_Index_Scope_Delim* next;
Code_Index_Scope_Delim* prev;
};
struct Code_Index_Scope_Delim_List {
Code_Index_Scope_Delim* first;
Code_Index_Scope_Delim* last;
i32 count;
};
struct Code_Index_Nest_List{
struct Code_Index_Nest *first;
struct Code_Index_Nest *last;
i32 count;
};
struct Code_Index_Nest_Ptr_Array{
struct Code_Index_Nest **ptrs;
i32 count;
};
typedef i32 Code_Index_Nest_Kind;
enum{
CodeIndexNest_Scope,
@ -29,7 +49,10 @@ enum{
struct Code_Index_Nest{
Code_Index_Nest *next;
// TODO(PS): kind and delim are redundant, I don't want to break virtual indentation just yet
// so Im leaving them both here.
Code_Index_Nest_Kind kind;
Code_Index_Scope_Delim_Kind delim;
b32 is_closed;
Range_i64 open;
Range_i64 close;
@ -38,7 +61,6 @@ struct Code_Index_Nest{
Code_Index_Nest *parent;
Code_Index_Nest_List nest_list;
Code_Index_Nest_Ptr_Array nest_array;
};
typedef i64 Code_Index_Note_Kind;
@ -67,17 +89,20 @@ struct Code_Index_Note_List{
i32 count;
};
struct Code_Index_Note_Ptr_Array{
Code_Index_Note **ptrs;
struct Code_Index_Note_List_New {
Code_Index_Note sentinel_first;
Code_Index_Note sentinel_last;
i32 count;
};
struct Code_Index_File{
Code_Index_Scope_Delim_List scope_delim_list;
Code_Index_Nest_List nest_list;
Code_Index_Nest_Ptr_Array nest_array;
Code_Index_Note_List note_list;
Code_Index_Note_Ptr_Array note_array;
Buffer_ID buffer;
Code_Index_Scope_Delim* scope_delim_free;
Code_Index_Note* note_free;
String_Pool string_pool;
};
struct Code_Index_File_Storage{
@ -96,7 +121,7 @@ struct Code_Index{
Code_Index_File_Storage *storage_last;
i32 storage_count;
Code_Index_Note_List name_hash[10000];
Code_Index_Note_List_New name_hash[10000];
};
////////////////////////////////

View File

@ -13,24 +13,24 @@ CUSTOM_UI_COMMAND_SIG(jump_to_definition)
CUSTOM_DOC("List all definitions in the code index and jump to one chosen by the user.")
{
char *query = "Definition:";
Scratch_Block scratch(app);
Lister_Block lister(app, scratch);
lister_set_query(lister, query);
lister_set_default_handlers(lister);
code_index_lock();
for (Buffer_ID buffer = get_buffer_next(app, 0, Access_Always);
buffer != 0;
buffer = get_buffer_next(app, buffer, Access_Always)){
Code_Index_File *file = code_index_get_file(buffer);
if (file != 0){
for (i32 i = 0; i < file->note_array.count; i += 1){
Code_Index_Note *note = file->note_array.ptrs[i];
Code_Index_Note* note = file->note_list.first;
for (i32 i = 0; i < file->note_list.count; i += 1, note = note->next){
Tiny_Jump *jump = push_array(scratch, Tiny_Jump, 1);
jump->buffer = buffer;
jump->pos = note->pos.first;
String_Const_u8 sort = {};
switch (note->note_kind){
case CodeIndexNote_Type:
@ -51,13 +51,13 @@ CUSTOM_DOC("List all definitions in the code index and jump to one chosen by the
}
}
code_index_unlock();
Lister_Result l_result = run_lister(app, lister);
Tiny_Jump result = {};
if (!l_result.canceled && l_result.user_data != 0){
block_copy_struct(&result, (Tiny_Jump*)l_result.user_data);
}
if (result.buffer != 0){
View_ID view = get_this_ctx_view(app, Access_Always);
point_stack_push_view_cursor(app, view);
@ -68,31 +68,36 @@ CUSTOM_DOC("List all definitions in the code index and jump to one chosen by the
CUSTOM_UI_COMMAND_SIG(jump_to_definition_at_cursor)
CUSTOM_DOC("Jump to the first definition in the code index matching an identifier at the cursor")
{
View_ID view = get_active_view(app, Access_Visible);
if (view != 0){
Scratch_Block scratch(app);
String_Const_u8 query = push_token_or_word_under_active_cursor(app, scratch);
code_index_lock();
for (Buffer_ID buffer = get_buffer_next(app, 0, Access_Always);
buffer != 0;
buffer = get_buffer_next(app, buffer, Access_Always)){
Code_Index_File *file = code_index_get_file(buffer);
if (file != 0){
for (i32 i = 0; i < file->note_array.count; i += 1){
Code_Index_Note *note = file->note_array.ptrs[i];
if (string_match(note->text, query)){
point_stack_push_view_cursor(app, view);
jump_to_location(app, view, buffer, note->pos.first);
goto done;
}
}
}
View_ID view = get_active_view(app, Access_Visible);
if (view != 0)
{
Scratch_Block scratch(app);
String_Const_u8 query = push_token_or_word_under_active_cursor(app, scratch);
code_index_lock();
for (Buffer_ID buffer = get_buffer_next(app, 0, Access_Always);
buffer != 0;
buffer = get_buffer_next(app, buffer, Access_Always)
){
Code_Index_File *file = code_index_get_file(buffer);
if (file != 0)
{
Code_Index_Note *note = file->note_list.first;
for (i32 i = 0; i < file->note_list.count; i += 1, note = note->next)
{
if (string_match(note->text, query))
{
point_stack_push_view_cursor(app, view);
jump_to_location(app, view, buffer, note->pos.first);
goto done;
}
}
done:;
code_index_unlock();
}
}
done:;
code_index_unlock();
}
}
global String_Const_u8 code_index_note_strs[] = {
@ -122,20 +127,20 @@ lister_add_from_buffer_code_index_filtered(Lister* lister, Buffer_ID buffer, Are
{
Code_Index_File* file_notes = code_index_get_file(buffer);
if (!file_notes) return;
for (Code_Index_Note* note = file_notes->note_list.first;
note != 0;
note = note->next)
{
if (!note_is_of_kind(kinds, kinds_count, note)) continue;
if (filter_all_but_last && note->next_in_hash) continue;
String_Const_u8 sort = code_index_note_strs[note->note_kind];
Tiny_Jump *jump = push_array(scratch, Tiny_Jump, 1);
jump->buffer = buffer;
jump->pos = note->pos.start;
lister_add_item(lister, note->text, sort, jump, 0);
}
}
@ -148,7 +153,7 @@ run_jump_lister(Application_Links* app, Lister* lister)
if (!l_result.canceled && l_result.user_data != 0){
block_copy_struct(&result, (Tiny_Jump*)l_result.user_data);
}
if (result.buffer != 0)
{
View_ID view = get_this_ctx_view(app, Access_Always);
@ -164,7 +169,7 @@ lister_search_filtered(Application_Links* app, char* query, Code_Index_Note_Kind
Lister_Block lister(app, scratch);
lister_set_query(lister, query);
lister_set_default_handlers(lister);
for (Buffer_ID buffer = get_buffer_next(app, 0, Access_Always);
buffer != 0; buffer = get_buffer_next(app, buffer, Access_Always))
{

View File

@ -0,0 +1,466 @@
///////////////////////////////////////////////////////////////////////////
// Begin Buffer
///////////////////////////////////////////////////////////////////////////
BUFFER_HOOK_SIG(custom_begin_buffer){
ProfileScope(app, "begin buffer");
Scratch_Block scratch(app);
Tree_Sitter_Language_Definition* language = tree_sitter_language_for_buffer(app, buffer_id);
bool treat_as_code = language != 0;
if (treat_as_code) tree_sitter_begin_buffer(app, buffer_id);
String_ID file_map_id = vars_save_string_lit("keys_file");
String_ID code_map_id = vars_save_string_lit("keys_code");
Command_Map_ID map_id = (treat_as_code)?(code_map_id):(file_map_id);
Managed_Scope scope = buffer_get_managed_scope(app, buffer_id);
Command_Map_ID *map_id_ptr = scope_attachment(app, scope, buffer_map_id, Command_Map_ID);
*map_id_ptr = map_id;
Line_Ending_Kind setting = guess_line_ending_kind_from_buffer(app, buffer_id);
Line_Ending_Kind *eol_setting = scope_attachment(app, scope, buffer_eol_setting, Line_Ending_Kind);
*eol_setting = setting;
// NOTE(allen): Decide buffer settings
b32 wrap_lines = true;
b32 use_lexer = false;
if (treat_as_code){
wrap_lines = def_get_config_b32(vars_save_string_lit("enable_code_wrapping"));
// TODO(PS): @Remove - consider removing the lexer for now? later, replace in favor of tree-sitter
use_lexer = true;
}
if (treat_as_code)
{
Async_Task* parse_task_ptr = scope_attachment(app, scope, buffer_parse_task, Async_Task);
*parse_task_ptr = async_task_no_dep(&global_async_system, tree_sitter_parse_full_file_async, make_data_struct(&buffer_id));
}
String_Const_u8 buffer_name = push_buffer_base_name(app, scratch, buffer_id);
if (buffer_name.size > 0 && buffer_name.str[0] == '*' && buffer_name.str[buffer_name.size - 1] == '*'){
wrap_lines = def_get_config_b32(vars_save_string_lit("enable_output_wrapping"));
}
if (use_lexer){
ProfileBlock(app, "begin buffer kick off lexer");
Async_Task *lex_task_ptr = scope_attachment(app, scope, buffer_lex_task, Async_Task);
*lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async, make_data_struct(&buffer_id));
}
{
b32 *wrap_lines_ptr = scope_attachment(app, scope, buffer_wrap_lines, b32);
*wrap_lines_ptr = wrap_lines;
}
if (use_lexer){
buffer_set_layout(app, buffer_id, layout_virt_indent_index_generic);
}
else{
if (treat_as_code){
buffer_set_layout(app, buffer_id, layout_virt_indent_literal_generic);
}
else{
buffer_set_layout(app, buffer_id, layout_generic);
}
}
// no meaning for return
return(0);
}
///////////////////////////////////////////////////////////////////////////
// End Buffer
///////////////////////////////////////////////////////////////////////////
BUFFER_HOOK_SIG(custom_end_buffer){
Marker_List *list = get_marker_list_for_buffer(buffer_id);
if (list != 0) delete_marker_list(list);
// TODO(PS): kill the async task for this buffer
tree_sitter_end_buffer(app, buffer_id);
default_end_buffer(app, buffer_id);
return(0);
}
///////////////////////////////////////////////////////////////////////////
// Buffer Edit Range
///////////////////////////////////////////////////////////////////////////
BUFFER_EDIT_RANGE_SIG(custom_buffer_edit_range){
ProfileScope(app, "custom edit range");
Scratch_Block scratch(app);
Managed_Scope scope = buffer_get_managed_scope(app, buffer_id);
Range_i64 old_range = Ii64(old_cursor_range.min.pos, old_cursor_range.max.pos);
b8 trigger_code_index_update = false;
{ // Tree Sitter
ProfileScope(app, "Tree Sitter Shift");
Buffer_Tree_Sitter_Data* tree_data = scope_attachment(app, scope, buffer_tree_sitter_data_id, Buffer_Tree_Sitter_Data);
// TODO(PS): if there's not tree_data, we actually want to block
// if there's an async parse or index task so we don't end up with
// a race condition when we try and incrementally update the tree
// and index
if (tree_data->tree)
{
i64 new_end_line = get_line_number_from_pos(app, buffer_id, new_range.end);
i64 new_end_pos = new_range.end - get_line_start_pos(app, buffer_id, new_end_line);
TSInputEdit edit;
edit.start_byte = (u32)old_range.start;
edit.old_end_byte = (u32)old_range.end;
edit.new_end_byte = (u32)new_range.end;
edit.start_point = {
(u32)old_cursor_range.start.line,
(u32)old_cursor_range.start.col
};
edit.old_end_point = {
(u32)old_cursor_range.end.line,
(u32)old_cursor_range.end.col
};
// TODO(PS): jack says this works but looks wrong???
edit.new_end_point = {
(u32)new_end_line - 1,
(u32)new_end_pos + 1,
};
ts_tree_edit(tree_data->tree, &edit);
tree_sitter_parse_incremental(app, buffer_id);
tree_data->last_update_old_range = old_range;
tree_data->last_update_new_range = new_range;
trigger_code_index_update = true;
}
}
buffer_shift_fade_ranges(buffer_id, old_range.max, (new_range.max - old_range.max));
{
ProfileScope(app, "Code Index Shift");
code_index_lock();
Code_Index_File *file = code_index_get_file(buffer_id);
if (file != 0) {
code_index_shift(file, old_range, range_size(new_range));
}
code_index_unlock();
}
i64 insert_size = range_size(new_range);
i64 text_shift = replace_range_shift(old_range, insert_size);
// TODO(PS): I think we want to replace this with a dumber tokenizer
Async_Task *lex_task_ptr = scope_attachment(app, scope, buffer_lex_task, Async_Task);
Base_Allocator *allocator = managed_scope_allocator(app, scope);
b32 do_full_relex = false;
if (async_task_is_running_or_pending(&global_async_system, *lex_task_ptr))
{
async_task_cancel(app, &global_async_system, *lex_task_ptr);
buffer_unmark_as_modified(buffer_id);
do_full_relex = true;
*lex_task_ptr = 0;
}
Token_Array *ptr = scope_attachment(app, scope, attachment_tokens, Token_Array);
if (ptr != 0 && ptr->tokens != 0)
{
ProfileBlockNamed(app, "attempt resync", profile_attempt_resync);
i64 token_index_first = token_relex_first(ptr, old_range.first, 1);
i64 token_index_resync_guess =
token_relex_resync(ptr, old_range.one_past_last, 16);
if (token_index_resync_guess - token_index_first >= 4000)
{
do_full_relex = true;
}
else
{
Token *token_first = ptr->tokens + token_index_first;
Token *token_resync = ptr->tokens + token_index_resync_guess;
Range_i64 relex_range = Ii64(token_first->pos, token_resync->pos + token_resync->size + text_shift);
String_Const_u8 partial_text = push_buffer_range(app, scratch, buffer_id, relex_range);
Token_List relex_list = lex_full_input_cpp(scratch, partial_text);
if (relex_range.one_past_last < buffer_get_size(app, buffer_id))
{
token_drop_eof(&relex_list);
}
Token_Relex relex = token_relex(relex_list, relex_range.first - text_shift, ptr->tokens, token_index_first, token_index_resync_guess);
ProfileCloseNow(profile_attempt_resync);
if (!relex.successful_resync)
{
do_full_relex = true;
}
else
{
ProfileBlock(app, "apply resync");
i64 token_index_resync = relex.first_resync_index;
Range_i64 head = Ii64(0, token_index_first);
Range_i64 replaced = Ii64(token_index_first, token_index_resync);
Range_i64 tail = Ii64(token_index_resync, ptr->count);
i64 resynced_count = (token_index_resync_guess + 1) - token_index_resync;
i64 relexed_count = relex_list.total_count - resynced_count;
i64 tail_shift = relexed_count - (token_index_resync - token_index_first);
i64 new_tokens_count = ptr->count + tail_shift;
Token *new_tokens = base_array(allocator, Token, new_tokens_count);
Token *old_tokens = ptr->tokens;
block_copy_array_shift(new_tokens, old_tokens, head, 0);
token_fill_memory_from_list(new_tokens + replaced.first, &relex_list, relexed_count);
for (i64 i = 0, index = replaced.first; i < relexed_count; i += 1, index += 1)
{
new_tokens[index].pos += relex_range.first;
}
for (i64 i = tail.first; i < tail.one_past_last; i += 1)
{
old_tokens[i].pos += text_shift;
}
block_copy_array_shift(new_tokens, ptr->tokens, tail, tail_shift);
base_free(allocator, ptr->tokens);
ptr->tokens = new_tokens;
ptr->count = new_tokens_count;
ptr->max = new_tokens_count;
buffer_mark_as_modified(buffer_id);
}
}
}
if (do_full_relex)
{
*lex_task_ptr = async_task_no_dep(&global_async_system, do_full_lex_async,
make_data_struct(&buffer_id));
}
loco_on_buffer_edit(app, buffer_id, old_range, new_range);
if (trigger_code_index_update) buffer_mark_as_modified(buffer_id);
// no meaning for return
return(0);
}
///////////////////////////////////////////////////////////////////////////
// Render Buffer
///////////////////////////////////////////////////////////////////////////
function void custom_render_buffer(
Application_Links *app,
View_ID view_id,
Face_ID face_id,
Buffer_ID buffer,
Text_Layout_ID text_layout_id,
Rect_f32 rect
){
ProfileScope(app, "render buffer");
Scratch_Block scratch(app);
View_ID active_view = get_active_view(app, Access_Always);
b32 is_active_view = (active_view == view_id);
Rect_f32 prev_clip = draw_set_clip(app, rect);
Range_i64 visible_range = text_layout_get_visible_range(app, text_layout_id);
// NOTE(allen): Cursor shape
Face_Metrics metrics = get_face_metrics(app, face_id);
u64 cursor_roundness_100 = def_get_config_u64(app, vars_save_string_lit("cursor_roundness"));
f32 cursor_roundness = metrics.normal_advance*cursor_roundness_100*0.01f;
f32 mark_thickness = (f32)def_get_config_u64(app, vars_save_string_lit("mark_thickness"));
// NOTE(allen): Token colorizing
Token_Array token_array;
paint_text_color_fcolor(app, text_layout_id, visible_range, fcolor_id(defcolor_text_default)); // will get overridden by lang-specific token coloring below
if (use_tree_sitter_token_coloring)
{
draw_tree_sitter_node_colors(app, text_layout_id, buffer);
}
else
{
token_array = get_token_array_from_buffer(app, buffer);
if (token_array.tokens != 0) {
draw_cpp_token_colors(app, text_layout_id, &token_array);
}
}
i64 cursor_pos = view_correct_cursor(app, view_id);
view_correct_mark(app, view_id);
// NOTE(allen): Scope highlight
b32 use_scope_highlight = def_get_config_b32(vars_save_string_lit("use_scope_highlight"));
if (use_scope_highlight){
Color_Array colors = finalize_color_array(defcolor_back_cycle);
draw_scope_highlight(app, buffer, text_layout_id, cursor_pos, colors.vals, colors.count);
}
// NOTE(PS): QOL Column
if (qol_col_cursor.pos >= 0){
Buffer_Seek seek = seek_line_col(qol_col_cursor.line, qol_col_cursor.col);
Buffer_Cursor cursor = buffer_compute_cursor(app, buffer, seek);
Rect_f32 col_rect = text_layout_character_on_screen(app, text_layout_id, cursor.pos);
if (col_rect.x1 > 0.f){
col_rect.y0 = rect.y0;
col_rect.y1 = rect.y1;
draw_rectangle_fcolor(app, col_rect, 0.f, fcolor_id(defcolor_highlight_cursor_line));
}
}
b32 use_error_highlight = def_get_config_b32(vars_save_string_lit("use_error_highlight"));
b32 use_jump_highlight = def_get_config_b32(vars_save_string_lit("use_jump_highlight"));
if (use_error_highlight || use_jump_highlight){
// NOTE(allen): Error highlight
String_Const_u8 name = string_u8_litexpr("*compilation*");
Buffer_ID compilation_buffer = get_buffer_by_name(app, name, Access_Always);
if (use_error_highlight){
draw_jump_highlights(app, buffer, text_layout_id, compilation_buffer,
fcolor_id(defcolor_highlight_junk));
}
// NOTE(allen): Search highlight
if (use_jump_highlight){
Buffer_ID jump_buffer = get_locked_jump_buffer(app);
if (jump_buffer != compilation_buffer){
draw_jump_highlights(app, buffer, text_layout_id, jump_buffer,
fcolor_id(defcolor_highlight_white));
}
}
}
// NOTE(allen): Color parens
b32 use_paren_helper = def_get_config_b32(vars_save_string_lit("use_paren_helper"));
if (use_paren_helper){
Color_Array colors = finalize_color_array(defcolor_text_cycle);
draw_paren_highlight(app, buffer, text_layout_id, cursor_pos, colors.vals, colors.count);
}
// NOTE(allen): Line highlight
b32 highlight_line_at_cursor = def_get_config_b32(vars_save_string_lit("highlight_line_at_cursor"));
if (highlight_line_at_cursor && is_active_view){
i64 line_number = get_line_number_from_pos(app, buffer, cursor_pos);
draw_line_highlight(app, text_layout_id, line_number, fcolor_id(defcolor_highlight_cursor_line));
}
// NOTE(allen): Whitespace highlight
b64 show_whitespace = false;
view_get_setting(app, view_id, ViewSetting_ShowWhitespace, &show_whitespace);
if (show_whitespace)
{
if (token_array.tokens == 0) token_array = get_token_array_from_buffer(app, buffer);
if (token_array.tokens == 0)
{
draw_whitespace_highlight(app, buffer, text_layout_id, cursor_roundness);
}
else
{
draw_whitespace_highlight(app, text_layout_id, &token_array, cursor_roundness);
}
}
// NOTE(allen): Cursor
switch (fcoder_mode){
case FCoderMode_Original:
{
draw_original_4coder_style_cursor_mark_highlight(app, view_id, is_active_view, buffer, text_layout_id, cursor_roundness, mark_thickness);
}break;
case FCoderMode_NotepadLike:
{
draw_notepad_style_cursor_highlight(app, view_id, buffer, text_layout_id, cursor_roundness);
}break;
}
// NOTE(allen): Fade ranges
paint_fade_ranges(app, text_layout_id, buffer);
// NOTE(allen): put the actual text on the actual screen
draw_text_layout_default(app, text_layout_id);
draw_set_clip(app, prev_clip);
}
function void
custom_render_caller(Application_Links *app, Frame_Info frame_info, View_ID view_id){
ProfileScope(app, "default render caller");
View_ID active_view = get_active_view(app, Access_Always);
b32 is_active_view = (active_view == view_id);
Rect_f32 region = draw_background_and_margin(app, view_id, is_active_view);
Rect_f32 prev_clip = draw_set_clip(app, region);
Buffer_ID buffer = view_get_buffer(app, view_id, Access_Always);
Face_ID face_id = get_face_id(app, buffer);
Face_Metrics face_metrics = get_face_metrics(app, face_id);
f32 line_height = face_metrics.line_height;
f32 digit_advance = face_metrics.decimal_digit_advance;
// NOTE(allen): file bar
b64 showing_file_bar = false;
if (view_get_setting(app, view_id, ViewSetting_ShowFileBar, &showing_file_bar) && showing_file_bar){
Rect_f32_Pair pair = layout_file_bar_on_top(region, line_height);
draw_file_bar(app, view_id, buffer, face_id, pair.min);
region = pair.max;
}
Buffer_Scroll scroll = view_get_buffer_scroll(app, view_id);
Buffer_Point_Delta_Result delta = delta_apply(app, view_id,
frame_info.animation_dt, scroll);
if (!block_match_struct(&scroll.position, &delta.point)){
block_copy_struct(&scroll.position, &delta.point);
view_set_buffer_scroll(app, view_id, scroll, SetBufferScroll_NoCursorChange);
}
if (delta.still_animating){
animate_in_n_milliseconds(app, 0);
}
// NOTE(allen): query bars
region = default_draw_query_bars(app, region, view_id, face_id);
// NOTE(allen): FPS hud
if (show_fps_hud){
Rect_f32_Pair pair = layout_fps_hud_on_bottom(region, line_height);
draw_fps_hud(app, frame_info, face_id, pair.max);
region = pair.min;
animate_in_n_milliseconds(app, 1000);
}
// NOTE(allen): layout line numbers
b32 show_line_number_margins = def_get_config_b32(vars_save_string_lit("show_line_number_margins"));
Rect_f32 line_number_rect = {};
if (show_line_number_margins){
Rect_f32_Pair pair = layout_line_number_margin(app, buffer, region, digit_advance);
line_number_rect = pair.min;
region = pair.max;
}
// NOTE(allen): begin buffer render
Buffer_Point buffer_point = scroll.position;
Text_Layout_ID text_layout_id = text_layout_create(app, buffer, region, buffer_point);
// NOTE(allen): draw line numbers
if (show_line_number_margins){
draw_line_number_margin(app, view_id, buffer, face_id, text_layout_id, line_number_rect);
}
// NOTE(allen): draw the buffer
custom_render_buffer(app, view_id, face_id, buffer, text_layout_id, region);
loco_render_buffer(app, view_id, face_id, buffer, text_layout_id, region, frame_info);
text_layout_free(app, text_layout_id);
draw_set_clip(app, prev_clip);
}

View File

@ -45,10 +45,10 @@ go_to_definition(Application_Links* app, String_Const_u8 lexeme, View_ID view)
// and then loop
if (string_match(go_to_definition_last_lexeme, lexeme))
{
Code_Index_Note_List* list = code_index__list_from_string(lexeme);
Code_Index_Note_List_New* list = code_index__list_from_string(lexeme);
u64 i = 0;
for (Code_Index_Note *it = list->first;
it != 0;
for (Code_Index_Note *it = list->sentinel_first.next_in_hash;
it != &list->sentinel_last;
it = it->next_in_hash, i++){
if (string_match(lexeme, it->text) && i > go_to_definition_last_lexeme_index){
note = it;
@ -520,28 +520,21 @@ custom_keyboard_bindings()
}
void
custom_layer_init(Application_Links *app){
Thread_Context *tctx = get_thread_context(app);
default_framework_init(app);
custom_layer_init(Application_Links *app)
{
Thread_Context *tctx = get_thread_context(app);
default_framework_init(app);
set_all_default_hooks(app);
modal_init(3, tctx);
set_all_default_hooks(app);
modal_init(3, tctx);
custom_keyboard_bindings();
#if 0
mapping_init(tctx, &framework_mapping);
String_ID global_map_id = vars_save_string_lit("keys_global");
String_ID file_map_id = vars_save_string_lit("keys_file");
String_ID code_map_id = vars_save_string_lit("keys_code");
#if OS_MAC
setup_mac_mapping(&framework_mapping, global_map_id, file_map_id, code_map_id);
#else
setup_default_mapping(&framework_mapping, global_map_id, file_map_id, code_map_id);
#endif
setup_essential_mapping(&framework_mapping, global_map_id, file_map_id, code_map_id);
#endif
set_custom_hook(app, HookID_BeginBuffer, custom_begin_buffer);
set_custom_hook(app, HookID_BufferEditRange, custom_buffer_edit_range);
set_custom_hook(app, HookID_EndBuffer, custom_end_buffer);
set_custom_hook(app, HookID_RenderCaller, custom_render_caller);
custom_keyboard_bindings();
tree_sitter_init(app);
}
#endif //FCODER_DEFAULT_BINDINGS

View File

@ -208,7 +208,9 @@ reload_clean_buffers_on_filesystem_change(Application_Links *app, Frame_Info fra
function void
default_tick(Application_Links *app, Frame_Info frame_info){
code_index_update_tick(app);
if (use_tree_sitter_code_indexing) { tree_sitter_code_index_update_tick(app); }
else { code_index_update_tick(app); }
if (tick_all_fade_ranges(app, frame_info.animation_dt)){
animate_in_n_milliseconds(app, 0);
@ -271,44 +273,6 @@ default_buffer_region(Application_Links *app, View_ID view_id, Rect_f32 region){
return(region);
}
function void
recursive_nest_highlight(Application_Links *app, Text_Layout_ID layout_id, Range_i64 range,
Code_Index_Nest_Ptr_Array *array, i32 counter){
Code_Index_Nest **ptr = array->ptrs;
Code_Index_Nest **ptr_end = ptr + array->count;
for (;ptr < ptr_end; ptr += 1){
Code_Index_Nest *nest = *ptr;
if (!nest->is_closed){
break;
}
if (range.first <= nest->close.max){
break;
}
}
ARGB_Color argb = finalize_color(defcolor_text_cycle, counter);
for (;ptr < ptr_end; ptr += 1){
Code_Index_Nest *nest = *ptr;
if (range.max <= nest->open.min){
break;
}
paint_text_color(app, layout_id, nest->open, argb);
if (nest->is_closed){
paint_text_color(app, layout_id, nest->close, argb);
}
recursive_nest_highlight(app, layout_id, range, &nest->nest_array, counter + 1);
}
}
function void
recursive_nest_highlight(Application_Links *app, Text_Layout_ID layout_id, Range_i64 range,
Code_Index_File *file){
recursive_nest_highlight(app, layout_id, range, &file->nest_array, 0);
}
function void default_render_buffer(
Application_Links *app,
View_ID view_id,

View File

@ -19,6 +19,7 @@
#include "4coder_table.h"
#include "4coder_events.h"
#include "4coder_types.h"
#include "4coder_string_pool.h"
#include "4coder_doc_content_types.h"
#include "4coder_default_colors.h"
#define DYNAMIC_LINK_API
@ -30,6 +31,7 @@
#include "generated/command_metadata.h"
#endif
#include "4coder_token.h"
#include "generated/lexer_cpp.h"
@ -65,6 +67,7 @@
#include "4coder_search_list.h"
#include "4coder_modal.h"
#include "4coder_qol.h"
#include "4coder_tree_sitter.h"
////////////////////////////////
@ -72,7 +75,7 @@
#include "4coder_stringf.cpp"
#include "4coder_app_links_allocator.cpp"
#include "4coder_system_allocator.cpp"
#include "4coder_string_pool.cpp"
#include "4coder_file.cpp"
#define DYNAMIC_LINK_API
@ -143,10 +146,12 @@
#include "4coder_search_list.cpp"
#include "4coder_modal.cpp"
#include "4coder_yeet.cpp"
#include "4coder_tree_sitter.cpp"
#include "4coder_examples.cpp"
#include "4coder_default_hooks.cpp"
#include "4coder_custom_hooks.cpp"
#include "4coder_qol.cpp"

View File

@ -645,7 +645,7 @@ boundary_token(Application_Links *app, Buffer_ID buffer, Side side, Scan_Directi
}
}
}break;
case Scan_Backward:
{
result = 0;
@ -772,17 +772,17 @@ seek_string(Application_Links *app, Buffer_ID buffer_id, i64 pos, i64 end, i64 m
{
seek_string_forward(app, buffer_id, pos, end, str, result);
}break;
case BufferSeekString_Backward:
{
seek_string_backward(app, buffer_id, pos, min, str, result);
}break;
case BufferSeekString_CaseInsensitive:
{
seek_string_insensitive_forward(app, buffer_id, pos, end, str, result);
}break;
case BufferSeekString_Backward|BufferSeekString_CaseInsensitive:
{
seek_string_insensitive_backward(app, buffer_id, pos, min, str, result);
@ -1248,12 +1248,12 @@ function Indent_Info
get_indent_info_range(Application_Links *app, Buffer_ID buffer, Range_i64 range, i32 tab_width){
Scratch_Block scratch(app);
String_Const_u8 s = push_buffer_range(app, scratch, buffer, range);
Indent_Info info = {};
info.first_char_pos = range.end;
info.is_blank = true;
info.all_space = true;
for (u64 i = 0; i < s.size; i += 1){
u8 c = s.str[i];
if (!character_is_whitespace(c)){
@ -1272,7 +1272,7 @@ get_indent_info_range(Application_Links *app, Buffer_ID buffer, Range_i64 range,
info.indent_pos += tab_width;
}
}
return(info);
}
@ -1329,17 +1329,17 @@ swap_lines(Application_Links *app, Buffer_ID buffer, i64 line_1, i64 line_2){
if (1 <= line_1 && line_2 <= line_count){
Range_i64 range_1 = get_line_pos_range(app, buffer, line_1);
Range_i64 range_2 = get_line_pos_range(app, buffer, line_2);
Scratch_Block scratch(app);
String_Const_u8 text_1 = push_buffer_range(app, scratch, buffer, range_1);
String_Const_u8 text_2 = push_buffer_range(app, scratch, buffer, range_2);
History_Group group = history_group_begin(app, buffer);
buffer_replace_range(app, buffer, range_2, text_1);
buffer_replace_range(app, buffer, range_1, text_2);
history_group_end(group);
i64 shift = replace_range_shift(range_1, text_2.size);
result.min = range_1.min;
result.max = range_2.min + shift;
@ -1505,13 +1505,13 @@ query_user_general(Application_Links *app, Query_Bar *bar, b32 force_number, Str
if (start_query_bar(app, bar, 0) == 0){
return(false);
}
if (init_string.size > 0){
String_u8 string = Su8(bar->string.str, bar->string.size, bar->string_capacity);
string_append(&string, init_string);
bar->string.size = string.string.size;
}
b32 success = true;
for (;;){
User_Input in = get_next_input(app, EventPropertyGroup_Any,
@ -1520,7 +1520,7 @@ query_user_general(Application_Links *app, Query_Bar *bar, b32 force_number, Str
success = false;
break;
}
Scratch_Block scratch(app);
b32 good_insert = false;
String_Const_u8 insert_string = to_writable(&in);
@ -1540,7 +1540,7 @@ query_user_general(Application_Links *app, Query_Bar *bar, b32 force_number, Str
good_insert = true;
}
}
if (in.event.kind == InputEventKind_KeyStroke &&
(in.event.key.code == KeyCode_Return || in.event.key.code == KeyCode_Tab)){
break;
@ -1576,7 +1576,7 @@ query_user_general(Application_Links *app, Query_Bar *bar, b32 force_number, Str
}
}
}
return(success);
}
@ -1668,23 +1668,23 @@ function void
place_begin_and_end_on_own_lines(Application_Links *app, char *begin, char *end){
View_ID view = get_active_view(app, Access_ReadWriteVisible);
Buffer_ID buffer = view_get_buffer(app, view, Access_ReadWriteVisible);
Range_i64 range = get_view_range(app, view);
Range_i64 lines = get_line_range_from_pos_range(app, buffer, range);
range = get_pos_range_from_line_range(app, buffer, lines);
Scratch_Block scratch(app);
b32 min_line_blank = line_is_valid_and_blank(app, buffer, lines.min);
b32 max_line_blank = line_is_valid_and_blank(app, buffer, lines.max);
if ((lines.min < lines.max) || (!min_line_blank)){
String_Const_u8 begin_str = {};
String_Const_u8 end_str = {};
i64 min_adjustment = 0;
i64 max_adjustment = 0;
if (min_line_blank){
begin_str = push_u8_stringf(scratch, "\n%s", begin);
min_adjustment += 1;
@ -1699,15 +1699,15 @@ place_begin_and_end_on_own_lines(Application_Links *app, char *begin, char *end)
end_str = push_u8_stringf(scratch, "\n%s", end);
max_adjustment += 1;
}
max_adjustment += begin_str.size;
Range_i64 new_pos = Ii64(range.min + min_adjustment, range.max + max_adjustment);
History_Group group = history_group_begin(app, buffer);
buffer_replace_range(app, buffer, Ii64(range.min), begin_str);
buffer_replace_range(app, buffer, Ii64(range.max + begin_str.size), end_str);
history_group_end(group);
set_view_range(app, view, new_pos);
}
else{
@ -1833,7 +1833,7 @@ view_disable_highlight_range(Application_Links *app, View_ID view){
function void
view_set_highlight_range(Application_Links *app, View_ID view, Range_i64 range){
view_disable_highlight_range(app, view);
Buffer_ID buffer = view_get_buffer(app, view, Access_Always);
Managed_Scope scope = view_get_managed_scope(app, view);
Managed_Object *highlight = scope_attachment(app, scope, view_highlight_range, Managed_Object);
@ -1853,7 +1853,7 @@ view_look_at_region(Application_Links *app, View_ID view, i64 major_pos, i64 min
if (major_pos == range.max){
bottom_major = true;
}
Buffer_Cursor top = view_compute_cursor(app, view, seek_pos(range.min));
if (top.line > 0){
Buffer_Cursor bottom = view_compute_cursor(app, view, seek_pos(range.max));
@ -1862,16 +1862,16 @@ view_look_at_region(Application_Links *app, View_ID view, i64 major_pos, i64 min
f32 view_height = rect_height(region);
f32 skirt_height = view_height*.1f;
Range_f32 acceptable_y = If32(skirt_height, view_height*.9f);
f32 target_height = view_line_y_difference(app, view, bottom.line + 1, top.line);
f32 line_height = get_view_line_height(app, view);
if (target_height + 2*line_height > view_height){
i64 major_line = bottom.line;
if (range.min == major_pos){
major_line = top.line;
}
Buffer_Scroll scroll = view_get_buffer_scroll(app, view);
scroll.target.line_number = major_line;
scroll.target.pixel_shift.y = -skirt_height;
@ -2357,7 +2357,7 @@ find_nest_side(Application_Links *app, Buffer_ID buffer, i64 pos,
Find_Nest_Flag flags, Scan_Direction scan, Nest_Delimiter_Kind delim,
Range_i64 *out){
b32 result = false;
b32 balanced = HasFlag(flags, FindNest_Balanced);
if (balanced){
if ((delim == NestDelim_Open && scan == Scan_Forward) ||
@ -2365,7 +2365,7 @@ find_nest_side(Application_Links *app, Buffer_ID buffer, i64 pos,
balanced = false;
}
}
Managed_Scope scope = buffer_get_managed_scope(app, buffer);
Token_Array *tokens = scope_attachment(app, scope, attachment_tokens, Token_Array);
if (tokens != 0 && tokens->count > 0){
@ -2374,17 +2374,17 @@ find_nest_side(Application_Links *app, Buffer_ID buffer, i64 pos,
for (;;){
Token *token = token_it_read(&it);
Nest_Delimiter_Kind token_delim = get_nest_delimiter_kind(token->kind, flags);
if (level == 0 && token_delim == delim){
*out = Ii64_size(token->pos, token->size);
result = true;
break;
}
if (balanced && token_delim != NestDelim_None){
level += (token_delim == delim)?-1:1;
}
b32 good = false;
if (scan == Scan_Forward){
good = token_it_inc(&it);
@ -2397,7 +2397,7 @@ find_nest_side(Application_Links *app, Buffer_ID buffer, i64 pos,
}
}
}
return(result);
}
@ -2476,7 +2476,7 @@ set_buffer_system_command(Application_Links *app, Child_Process_ID process, Buff
clear_buffer(app, buffer);
if (HasFlag(flags, CLI_SendEndSignal)){
buffer_send_end_signal(app, buffer);
Buffer_Hook_Function *begin_buffer = (Buffer_Hook_Function*)get_custom_hook(app, HookID_BeginBuffer);
if (begin_buffer != 0){
begin_buffer(app, buffer);

View File

@ -0,0 +1,129 @@
function void
string_pool_init(String_Pool* pool)
{
pool->free_first.next = &pool->free_last;
pool->free_last.prev = &pool->free_first;
}
function String_Pool_Free_List*
free_string_inner(String_Pool* pool, String_Const_u8 str)
{
String_Pool_Free_List* free_at = (String_Pool_Free_List*)str.str;
free_at->next = 0; free_at->prev = 0;
free_at->size = str.size;
String_Pool_Free_List* prev = 0;
for (String_Pool_Free_List* at = pool->free_first.next; at != &pool->free_last; at = prev->next)
{
u8* addr = (u8*)at;
if (addr < (u8*)free_at) prev = at;
else break;
}
if (prev)
{
String_Pool_Free_List* next = prev->next;
prev->next = free_at;
free_at->prev = prev;
free_at->next = next;
next->prev = free_at;
b8 should_merge = (u8*)prev + prev->size == (u8*)free_at;
if (should_merge) free_at = prev;
}
else
{
String_Pool_Free_List* prev = &pool->free_first;
String_Pool_Free_List* next = pool->free_first.next;
prev->next = free_at;
free_at->prev = prev;
free_at->next = next;
next->prev = free_at;
}
while ((u8*)free_at + free_at->size == (u8*)free_at->next)
{
String_Pool_Free_List* next = free_at->next;
free_at->size += next->size;
next->next->prev = free_at;
free_at->next = next->next;
}
return free_at;
}
function String_Pool_Free_List*
string_pool_push_buffer(String_Pool* pool, int size_provided, Arena* backing_arena)
{
u64 next_buffer_size = pool->last_buffer_size * 2;
if (next_buffer_size == 0) next_buffer_size = KB(4);
String_Const_u8 buffer_data = string_const_u8_push(backing_arena, next_buffer_size);
pool->last_buffer_size = next_buffer_size;
String_Pool_Buffer* buffer = (String_Pool_Buffer*)buffer_data.str;
buffer_data.str += sizeof(String_Pool_Buffer);
buffer_data.size -= sizeof(String_Pool_Buffer);
buffer->data = buffer_data;
buffer->next = pool->buffers;
pool->buffers = buffer;
return free_string_inner(pool, buffer_data);
}
function String_Const_u8
alloc_string(String_Pool* pool, int size_provided, Arena* backing_arena)
{
int size = ((size_provided + STRING_POOL_ALLOC_SIZE - 1) / STRING_POOL_ALLOC_SIZE) * STRING_POOL_ALLOC_SIZE;
String_Pool_Free_List* free_at = pool->free_first.next;
while (free_at != &pool->free_last && free_at->size < size) free_at = free_at->next;
if (free_at == &pool->free_last || free_at->size < size)
{
free_at = string_pool_push_buffer(pool, size, backing_arena);
}
Assert(free_at->size >= size);
String_Const_u8 result;
result.str = (u8*)free_at;
result.size = size;
String_Pool_Free_List* prev = free_at->prev;
String_Pool_Free_List* next = free_at->next;
if (free_at->size - size > 0)
{
u8* new_free_at_ptr = (u8*)free_at;
String_Pool_Free_List* new_free_at = (String_Pool_Free_List*)(new_free_at_ptr + size);
new_free_at->size = free_at->size - size;
prev->next = new_free_at;
new_free_at->prev = prev;
new_free_at->next = next;
next->prev = new_free_at;
}
else
{
prev->next = next;
next->prev = prev;
}
block_zero(result.str, result.size);
return result;
}
function String_Const_u8
alloc_string_copy(String_Pool* pool, String_Const_u8 src, Arena* backing_arena)
{
String_Const_u8 dst = alloc_string(pool, src.size+1, backing_arena);
dst.size = src.size;
block_copy_dynamic_array(dst.str, src.str, src.size);
dst.str[src.size] = 0;
return dst;
}
function void
free_string(String_Pool* pool, String_Const_u8 str)
{
free_string_inner(pool, str);
}

View File

@ -0,0 +1,29 @@
/* date = August 3rd 2025 5:01 pm */
#ifndef FCODER_STRING_POOL_H
#define FCODER_STRING_POOL_H
#define STRING_POOL_ALLOC_SIZE 64
struct String_Pool_Free_List
{
i64 size;
String_Pool_Free_List* next;
String_Pool_Free_List* prev;
};
struct String_Pool_Buffer
{
String_Const_u8 data;
String_Pool_Buffer* next;
};
struct String_Pool
{
String_Pool_Buffer* buffers;
String_Pool_Free_List free_first;
String_Pool_Free_List free_last;
u64 last_buffer_size;
};
#endif //FCODER_STRING_POOL_H

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,140 @@
/* date = July 8th 2025 10:13 am */
#ifndef FCODER_TREE_SITTER_H
#define FCODER_TREE_SITTER_H
#include <tree_sitter/api.h>
enum Tree_Sitter_Language_Query_Kind
{
Tree_Sitter_Language_Query_Highlights,
Tree_Sitter_Language_Query_Tags,
Tree_Sitter_Language_Query_Count,
};
struct Tree_Sitter_Language_Queries
{
TSQuery* ptr[Tree_Sitter_Language_Query_Count];
};
typedef u32 Tree_Sitter_Language_Flags;
enum
{
Tree_Sitter_Language_Can_Receive_Virtual_Indent = 0x1
};
struct Tree_Sitter_Query_Color_Map
{
u64 hash;
String_Const_u8 capture_name;
Managed_ID color_id;
};
struct Tree_Sitter_Language_Definition
{
String_Const_u8 extension;
u64 extension_hash;
TSLanguage* language;
Tree_Sitter_Language_Queries queries;
Tree_Sitter_Language_Flags flags;
Tree_Sitter_Query_Color_Map colors[1000];
Tree_Sitter_Language_Definition* next;
};
struct Tree_Sitter_Languages
{
Arena arena;
Tree_Sitter_Language_Definition* languages[4096];
};
struct Buffer_Tree_Sitter_Data
{
TSTree* tree;
System_Mutex tree_mutex;
// Code Index Update Requests
Range_i64 last_update_old_range;
Range_i64 last_update_new_range;
Range_i64 last_update_node_range;
};
struct Tree_Sitter_Query_Cursor
{
Buffer_ID buffer_id;
TSQuery* query;
TSQueryCursor* query_cursor;
TSTree* tree;
TSNode first_node;
bool ok;
};
struct Code_Index_Nest_Stack
{
Code_Index_Nest_Stack* prev;
Code_Index_Nest_Stack* next;
Code_Index_Nest* nest;
u32 match_id;
};
struct Tree_Sitter_Parse_State
{
Buffer_Tree_Sitter_Data* tree_data;
TSParser* parser;
TSTree *old_tree;
};
struct Tree_Sitter_Code_Index_Update_State
{
Buffer_ID buffer_id;
String_Const_u8 buffer_contents;
Tree_Sitter_Language_Definition* language;
Arena index_arena;
Code_Index_File* index;
Code_Index_Note_List_New new_notes;
Code_Index_Nest_Stack* nest_stack_first = 0;
Code_Index_Nest_Stack* nest_stack_last = 0;
Code_Index_Scope_Delim* last_delim = 0;
Code_Index_Note* last_note = 0;
u32 last_note_match_id = max_u32;
b8 ok;
};
///////////////////////////////////////////////////////
// Globals
global Tree_Sitter_Languages tree_sitter_languages;
CUSTOM_ID(attachment, buffer_tree_sitter_data_id);
CUSTOM_ID(attachment, buffer_parse_task);
CUSTOM_ID(attachment, buffer_update_code_index_task);
b8 use_tree_sitter_code_indexing = true;
b8 use_tree_sitter_token_coloring = true;
///////////////////////////////////////////////////////
// API
function Tree_Sitter_Query_Color_Map* tree_sitter_get_language_query_color_map(Tree_Sitter_Language_Definition* lang, String_Const_u8 capture_name);
function void tree_sitter_set_language_query_color_map(Application_Links* app, Tree_Sitter_Language_Definition* lang, String_Const_u8 capture_name, String_Const_u8 color_id);
function void tree_sitter_languages_init_default_colors(Application_Links* app, Tree_Sitter_Language_Definition* lang);
function TSQuery* tree_sitter_query_new(Application_Links* app, TSLanguage* language, String_Const_u8 query_string);
function Tree_Sitter_Language_Definition* tree_sitter_register_language(String_Const_u8 ext, TSLanguage* language, Tree_Sitter_Language_Queries queries, Tree_Sitter_Language_Flags flags);
function Tree_Sitter_Language_Definition* tree_sitter_language_for_buffer(Application_Links* app, Buffer_ID buffer_id);
function Tree_Sitter_Code_Index_Update_State tree_sitter_code_index_update_full_file(Application_Links *app, Buffer_ID buffer_id);
function void tree_sitter_code_index_update_tick(Application_Links *app);
function void tree_sitter_code_index_update_async(Async_Context* actx, String_Const_u8 data);
function void print_tree_sitter_tree(TSNode cur_node, i32 level, const char* field);
#endif //FCODER_TREE_SITTER_H

View File

@ -35,8 +35,8 @@ debug=-g
opts="-Wno-macro-redefined -Wno-write-strings -Wno-null-dereference -Wno-comment -Wno-switch -Wno-missing-declarations -Wno-logical-op-parentheses -g -DOS_LINUX=1 -DOS_WINDOWS=0 -DOS_MAC=0"
pushd $dst
pushd $dst > /dev/null
g++ -I"$CUSTOM_ROOT" $opts $full_target -o one_time
popd
popd > /dev/null

View File

@ -0,0 +1,83 @@
/* date = July 11th 2025 6:02 pm */
#ifndef TREE_SITTER_BASH_H
#define TREE_SITTER_BASH_H
// Source: https://github.com/tree-sitter/tree-sitter-bash/blob/master/queries/highlights.scm
String_Const_u8 TS_BASH_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE(
[
(string)
(raw_string)
(heredoc_body)
(heredoc_start)
] @string
(command_name) @function.call
(variable_name) @property
[
"case"
"do"
"done"
"elif"
"else"
"esac"
"export"
"fi"
"for"
"function"
"if"
"in"
"select"
"then"
"unset"
"until"
"while"
] @keyword
(comment) @comment
(function_definition name: (word) @function)
(file_descriptor) @number
[
(command_substitution)
(process_substitution)
(expansion)
] @embedded
[
"$"
"&&"
">"
">>"
"<"
"|"
] @operator
(
(command (_) @constant)
(#match? @constant "^-")
)
)DONE");
extern "C" {
TSLanguage* tree_sitter_bash();
}
void
tree_sitter_register_bash(Application_Links* app)
{
TSLanguage* language = tree_sitter_bash();
Tree_Sitter_Language_Queries queries = {};
queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_BASH_HIGHLIGHT_QUERY);
Tree_Sitter_Language_Definition* lang;
lang = tree_sitter_register_language(SCu8("sh"), language, queries, 0);
tree_sitter_languages_init_default_colors(app, lang);
}
#endif //TREE_SITTER_BASH_H

View File

@ -0,0 +1,232 @@
/* date = July 11th 2025 6:04 pm */
#ifndef TREE_SITTER_CPP_H
#define TREE_SITTER_CPP_H
// Source: https://github.com/tree-sitter/tree-sitter-cpp/blob/master/queries/tags.scm
String_Const_u8 TS_CPP_TAGS_QUERY_SCM = string_u8_litexpr(R"DONE(
(struct_specifier name: (type_identifier) @name body:(_)) @definition.class
(declaration type: (union_specifier name: (type_identifier) @name)) @definition.class
(function_declarator declarator: (identifier) @name) @definition.function
(function_declarator declarator: (field_identifier) @name) @definition.function
(function_declarator declarator: (qualified_identifier scope: (namespace_identifier) @local.scope name: (identifier) @name)) @definition.method
(type_definition declarator: (type_identifier) @name) @definition.type
(enum_specifier name: (type_identifier) @name) @definition.type
(class_specifier name: (type_identifier) @name) @definition.class
"{" @scope_begin
"(" @scope_begin
"[" @scope_begin
"}" @scope_end
")" @scope_end
"]" @scope_end
; (_ "{" @scope_begin "}" @scope_end )
; (_ "(" @scope_begin ")" @scope_end )
; (_ "[" @scope_begin "]" @scope_end )
)DONE");
// Source: https://github.com/tree-sitter/tree-sitter-cpp/blob/master/queries/highlights.scm
String_Const_u8 TS_CPP_HIGHLIGHT_QUERY_SCM = string_u8_litexpr(R"DONE(
(identifier) @variable
((identifier) @constant
(#match? @constant "^[A-Z][A-Z\\d_]*$"))
"break" @keyword
"case" @keyword
"const" @keyword
"continue" @keyword
"default" @keyword
"do" @keyword
"else" @keyword
"enum" @keyword
"extern" @keyword
"for" @keyword
"if" @keyword
"inline" @keyword
"return" @keyword
"sizeof" @keyword
"static" @keyword
"struct" @keyword
"switch" @keyword
"typedef" @keyword
"union" @keyword
"volatile" @keyword
"while" @keyword
"#define" @keyword
"#elif" @keyword
"#else" @keyword
"#endif" @keyword
"#if" @keyword
"#ifdef" @keyword
"#ifndef" @keyword
"#include" @keyword
(preproc_directive) @keyword
"--" @operator
"-" @operator
"-=" @operator
"->" @operator
"=" @operator
"!=" @operator
"*" @operator
"&" @operator
"&&" @operator
"+" @operator
"++" @operator
"+=" @operator
"<" @operator
"==" @operator
">" @operator
"||" @operator
"." @delimiter
";" @delimiter
(string_literal) @string
(system_lib_string) @string
(null) @constant
(number_literal) @number
(char_literal) @number
(field_identifier) @property
(statement_identifier) @label
(type_identifier) @type
(primitive_type) @type
(sized_type_specifier) @type
(call_expression
function: (identifier) @function)
(call_expression
function: (field_expression
field: (field_identifier) @function))
(function_declarator
declarator: (identifier) @function)
(preproc_function_def
name: (identifier) @function.special)
(comment) @comment
; Functions
(call_expression
function: (qualified_identifier
name: (identifier) @function))
(template_function
name: (identifier) @function)
(template_method
name: (field_identifier) @function)
(template_function
name: (identifier) @function)
(function_declarator
declarator: (qualified_identifier
name: (identifier) @function))
(function_declarator
declarator: (field_identifier) @function)
; Types
((namespace_identifier) @type
(#match? @type "^[A-Z]"))
(auto) @type
; Constants
(this) @variable.builtin
(null "nullptr" @constant)
; Modules
(module_name
(identifier) @module)
; Keywords
[
"catch"
"class"
"co_await"
"co_return"
"co_yield"
"constexpr"
"constinit"
"consteval"
"delete"
"explicit"
"final"
"friend"
"mutable"
"namespace"
"noexcept"
"new"
"override"
"private"
"protected"
"public"
"template"
"throw"
"try"
"typename"
"using"
"concept"
"requires"
"virtual"
"import"
"export"
"module"
] @keyword
; Strings
(raw_string_literal) @string
)DONE");
extern "C" {
TSLanguage *tree_sitter_cpp();
}
void
tree_sitter_register_cpp(Application_Links* app)
{
TSLanguage* language = tree_sitter_cpp();
Tree_Sitter_Language_Queries queries = {};
queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_CPP_HIGHLIGHT_QUERY_SCM);
queries.ptr[Tree_Sitter_Language_Query_Tags] = tree_sitter_query_new(app, language, TS_CPP_TAGS_QUERY_SCM);
Tree_Sitter_Language_Flags flags = (
Tree_Sitter_Language_Can_Receive_Virtual_Indent
);
Tree_Sitter_Language_Definition* lang;
lang = tree_sitter_register_language(SCu8("c"), language, queries, flags);
tree_sitter_languages_init_default_colors(app, lang);
lang = tree_sitter_register_language(SCu8("cpp"), language, queries, flags);
tree_sitter_languages_init_default_colors(app, lang);
lang = tree_sitter_register_language(SCu8("h"), language, queries, flags);
tree_sitter_languages_init_default_colors(app, lang);
lang = tree_sitter_register_language(SCu8("hpp"), language, queries, flags);
tree_sitter_languages_init_default_colors(app, lang);
lang = tree_sitter_register_language(SCu8("cc"), language, queries, flags);
tree_sitter_languages_init_default_colors(app, lang);
}
#endif //TREE_SITTER_CPP_H

View File

@ -0,0 +1,314 @@
/* date = July 11th 2025 6:03 pm */
#ifndef TREE_SITTER_JAI_H
#define TREE_SITTER_JAI_H
String_Const_u8 TS_JAI_TAGS_QUERY = string_u8_litexpr(R"DONE(
(procedure_declaration
name: (identifier) @definition.function
)
(struct_declaration
name: (identifier) @definition.type
)
(enum_declaration
name: (identifier) @definition.type
)
("{" @scope_begin "}" @scope_end )
("(" @scope_begin ")" @scope_end )
("[" @scope_begin "]" @scope_end )
)DONE");
// NOTE(PS): source: https://github.com/St0wy/tree-sitter-jai/blob/main/queries/highlights.scm
String_Const_u8 TS_JAI_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE(
; Includes
[
(import)
(load)
] @include
; Keywords
[
; from modules/Jai_Lexer
"if"
"xx"
"ifx"
"for"
"then"
"else"
"null"
"case"
"enum"
"true"
"cast"
"while"
"break"
"using"
"defer"
"false"
"union"
"return"
"struct"
"inline"
"remove"
; "size_of"
"type_of"
; "code_of"
; "context"
"continue"
"operator"
; "type_info"
"no_inline"
"interface"
"enum_flags"
; "is_constant"
"push_context"
; "initializer_of"
] @keyword
[
"return"
] @keyword.return
[
"if"
"else"
"case"
"break"
] @keyword.conditional
((if_expression
[
"then"
"ifx"
"else"
] @keyword.conditional.ternary)
(#set! "priority" 105))
; Repeats
[
"for"
"while"
"continue"
] @keyword.repeat
; Variables
; (identifier) @variable
name: (identifier) @variable
argument: (identifier) @variable
named_argument: (identifier) @variable
(member_expression (identifier) @variable)
(parenthesized_expression (identifier) @variable)
((identifier) @variable.builtin
(#any-of? @variable.builtin "context"))
; Namespaces
(import (identifier) @namespace)
; Parameters
(parameter (identifier) @parameter ":" "="? (identifier)? @constant)
; (call_expression argument: (identifier) @parameter "=")
; Functions
; (procedure_declaration (identifier) @function (procedure (block)))
(procedure_declaration (identifier) @function (block))
(call_expression function: (identifier) @function.call)
; Types
type: (types) @type
type: (identifier) @type
((types) @type)
modifier: (identifier) @keyword
keyword: (identifier) @keyword
((types (identifier) @type.builtin)
(#any-of? @type.builtin
"bool" "int" "string"
"s8" "s16" "s32" "s64"
"u8" "u16" "u32" "u64"
"Type" "Any"))
(struct_declaration (identifier) @type ":" ":")
(enum_declaration (identifier) @type ":" ":")
; (const_declaration (identifier) @type ":" ":" [(array_type) (pointer_type)])
; ; I don't like this
; ((identifier) @type
; (#lua-match? @type "^[A-Z][a-zA-Z0-9]*$")
; (#not-has-parent? @type parameter procedure_declaration call_expression))
; Fields
(member_expression "." (identifier) @field)
(assignment_statement (identifier) @field "="?)
(update_statement (identifier) @field)
; Constants
((identifier) @constant
(#lua-match? @constant "^_*[A-Z][A-Z0-9_]*$")
(#not-has-parent? @constant type parameter))
(member_expression . "." (identifier) @constant)
(enum_declaration "{" (identifier) @constant)
; Literals
(integer) @number
(float) @number
(string) @string
;(character) @character
(string (escape_sequence) @string.escape)
(boolean) @boolean
[
(uninitialized)
(null)
] @constant.builtin
; Operators
[
":"
"="
"+"
"-"
"*"
"/"
"%"
">"
">="
"<"
"<="
"=="
"!="
"|"
"~"
"&"
"&~"
"<<"
">>"
"<<<"
">>>"
"||"
"&&"
"!"
".."
"+="
"-="
"*="
"/="
"%="
"&="
"|="
"^="
"<<="
">>="
"<<<="
">>>="
"||="
"&&="
] @operator
; Punctuation
[ "{" "}" ] @punctuation.bracket
[ "(" ")" ] @punctuation.bracket
[ "[" "]" ] @punctuation.bracket
[
"`"
"->"
"."
","
":"
";"
] @punctuation.delimiter
; Comments
[
(block_comment)
(comment)
] @comment @spell
; Errors
(ERROR) @error
(block_comment) @comment
directive: ("#") @keyword ; #if
type: ("type_of") @type
(compiler_directive) @keyword
(heredoc_start) @none
(heredoc_end) @none
(heredoc_body) @string
(note) @string
)DONE");
extern "C" {
TSLanguage *tree_sitter_jai();
}
void
tree_sitter_register_jai (Application_Links* app)
{
TSLanguage* language = tree_sitter_jai();
Tree_Sitter_Language_Queries queries = {};
queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_JAI_HIGHLIGHT_QUERY);
queries.ptr[Tree_Sitter_Language_Query_Tags] = tree_sitter_query_new(app, language, TS_JAI_TAGS_QUERY);
Tree_Sitter_Language_Flags flags = (
Tree_Sitter_Language_Can_Receive_Virtual_Indent
);
Tree_Sitter_Language_Definition* lang;
lang = tree_sitter_register_language(SCu8("jai"), language, queries, flags);
tree_sitter_set_language_query_color_map(app, lang, SCu8("function"), SCu8("defcolor_function"));
tree_sitter_set_language_query_color_map(app, lang, SCu8("function.call"), SCu8("defcolor_function"));
tree_sitter_set_language_query_color_map(app, lang, SCu8("type"), SCu8("defcolor_type"));
tree_sitter_set_language_query_color_map(app, lang, SCu8("constant"), SCu8("defcolor_int_constnat"));
tree_sitter_set_language_query_color_map(app, lang, SCu8("keyword"), SCu8("defcolor_keyword"));
tree_sitter_set_language_query_color_map(app, lang, SCu8("string"), SCu8("defcolor_str_constant"));
tree_sitter_set_language_query_color_map(app, lang, SCu8("comment"), SCu8("defcolor_comment"));
}
#endif //TREE_SITTER_JAI_H

View File

@ -0,0 +1,73 @@
/*
tree_sitter_language_base.h
This file is a template from which you can set up new languages for syntax highlighting,
go-to-definition, and virtual whitespace in 4coder.
BEFORE YOU START: go read the Adding a Language instructions in README.md
1. find and replace "NEWLANG" with <your language identifier>
Example: "NEWLANG" -> "RUST" and "newlang" -> "rust"
2. Go through each TODO in this file and complete it
3. Include this file from "code/custom/4coder_tree_sitter.cpp"
4. Add "tree_sitter_register_newlang(app);" to 4coder_tree_sitter.cpp::register_all_languages()
5. Compile and run
If you are confused about what to do, go look at tree_sitter_cpp.h as a working example.
*/
#ifndef TREE_SITTER_LANGUAGE_BASE_H
#define TREE_SITTER_LANGUAGE_BASE_H
String_Const_u8 TS_NEWLANG_EXTENSIONS[] = [
// TODO(PS): fill out this array with the extensions you want to be treated
// as this language.
SCu8("ext1"),
SCu8("ext2"),
];
String_Const_u8 TS_NEWLANG_TAGS_QUERY_SCM = string_u8_litexpr(R"DONE(
; TODO - paste your grammars tags query here
; Important - if you want virtual indentation, leave these tag queries here
(_ "{" @scope_begin "}" @scope_end )
(_ "(" @scope_begin ")" @scope_end )
(_ "[" @scope_begin "]" @scope_end )
)DONE");
String_Const_u8 TS_NEWLANG_HIGHLIGHT_QUERY_SCM = string_u8_litexpr(R"DONE(
; TODO - paste your grammars highlights query here
)DONE");
// NOTE(PS): depending on how you built your scanner, it might not need to be inside an extern "C" block
extern "C" {
TSLanguage* tree_sitter_newlang;
}
void
tree_sitter_register_newlang(Application_Links* app)
{
TSLanguage* language = tree_sitter_newlang();
Tree_Sitter_Language_Queries queries = {};
queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_NEWLANG_HIGHLIGHT_QUERY_SCM);
queries.ptr[Tree_Sitter_Language_Query_Tags] = tree_sitter_query_new(app, language, TS_NEWLANG_TAGS_QUERY_SCM);
// TODO(PS): set this to zero if your language can not make use of virtual indentation (like python)
Tree_Sitter_Language_Flags flags = (
Tree_Sitter_Language_Can_Receive_Virtual_Indent
);
for (int i = 0; i < ArrayCount(TS_NEWLANG_EXTENSIONS); i++)
{
String_Const_u8 ext = TS_NEWLANG_EXTENSIONS[i];
tree_sitter_register_language(ext, language, queries, flags);
}
}
#endif //TREE_SITTER_LANGUAGE_BASE_H

View File

@ -0,0 +1,313 @@
/* date = July 13th 2025 11:38 am */
#ifndef TREE_SITTER_TS_H
#define TREE_SITTER_TS_H
String_Const_u8 TS_TS_TAGS_QUERY = string_u8_litexpr(R"DONE(
(function_declaration
name: (identifier) @name) @definition.function
(function_signature
name: (identifier) @name) @definition.function
(variable_declarator
name: (identifier) @name
value: (arrow_function)) @definition.function
(method_signature
name: (property_identifier) @name) @definition.method
(abstract_method_signature
name: (property_identifier) @name) @definition.method
(abstract_class_declaration
name: (type_identifier) @name) @definition.class
(module
name: (identifier) @name) @definition.module
(interface_declaration
name: (type_identifier) @name) @definition.type
(type_alias_declaration
name: (type_identifier) @name) @definition.type
(type_annotation
(type_identifier) @name) @reference.type
(new_expression
constructor: (identifier) @name) @reference.class
(_ "{" @scope_begin "}" @scope_end )
(_ "(" @scope_begin ")" @scope_end )
(_ "[" @scope_begin "]" @scope_end )
)DONE");
String_Const_u8 TS_TS_HIGHLIGHT_QUERY = string_u8_litexpr(R"DONE(
;;;;;;;;;;;;;;;;; JAVASCRIPT ;;;;;;;;;;;;;;;;;
; Variables
;----------
(identifier) @variable
; Properties
;-----------
(property_identifier) @property
; Function and method definitions
;--------------------------------
(function_expression
name: (identifier) @function)
(function_declaration
name: (identifier) @function)
(method_definition
name: (property_identifier) @function.method)
(pair
key: (property_identifier) @function.method
value: [(function_expression) (arrow_function)])
(assignment_expression
left: (member_expression
property: (property_identifier) @function.method)
right: [(function_expression) (arrow_function)])
(variable_declarator
name: (identifier) @function
value: [(function_expression) (arrow_function)])
(assignment_expression
left: (identifier) @function
right: [(function_expression) (arrow_function)])
; Function and method calls
;--------------------------
(call_expression
function: (identifier) @function)
(call_expression
function: (member_expression
property: (property_identifier) @function.method))
; Special identifiers
;--------------------
((identifier) @constructor
(#match? @constructor "^[A-Z]"))
([
(identifier)
(shorthand_property_identifier)
(shorthand_property_identifier_pattern)
] @constant
(#match? @constant "^[A-Z_][A-Z\\d_]+$"))
((identifier) @variable.builtin
(#match? @variable.builtin "^(arguments|module|console|window|document)$")
(#is-not? local))
((identifier) @function.builtin
(#eq? @function.builtin "require")
(#is-not? local))
; Literals
;---------
(this) @variable.builtin
(super) @variable.builtin
[
(true)
(false)
(null)
(undefined)
] @constant.builtin
(comment) @comment
[
(string)
(template_string)
] @string
(regex) @string.special
(number) @number
; Tokens
;-------
[
";"
(optional_chain)
"."
","
] @punctuation.delimiter
[
"-"
"--"
"-="
"+"
"++"
"+="
"*"
"*="
"**"
"**="
"/"
"/="
"%"
"%="
"<"
"<="
"<<"
"<<="
"="
"=="
"==="
"!"
"!="
"!=="
"=>"
">"
">="
">>"
">>="
">>>"
">>>="
"~"
"^"
"&"
"|"
"^="
"&="
"|="
"&&"
"||"
"??"
"&&="
"||="
"??="
] @operator
[
"("
")"
"["
"]"
"{"
"}"
] @punctuation.bracket
(template_substitution
"${" @punctuation.special
"}" @punctuation.special) @embedded
[
"as"
"async"
"await"
"break"
"case"
"catch"
"class"
"const"
"continue"
"debugger"
"default"
"delete"
"do"
"else"
"export"
"extends"
"finally"
"for"
"from"
"function"
"get"
"if"
"import"
"in"
"instanceof"
"let"
"new"
"of"
"return"
"set"
"static"
"switch"
"target"
"throw"
"try"
"typeof"
"var"
"void"
"while"
"with"
"yield"
] @keyword
;;;;;;;;;;;;;;;;; TYPESCRIPT ;;;;;;;;;;;;;;;;;
; Types
(type_identifier) @type
(predefined_type) @type.builtin
((identifier) @type
(#match? @type "^[A-Z]"))
(type_arguments
"<" @punctuation.bracket
">" @punctuation.bracket)
; Variables
(required_parameter (identifier) @variable.parameter)
(optional_parameter (identifier) @variable.parameter)
; Keywords
[ "abstract"
"declare"
"enum"
"export"
"implements"
"interface"
"keyof"
"namespace"
"private"
"protected"
"public"
"type"
"readonly"
"override"
"satisfies"
] @keyword
)DONE");
extern "C" {
TSLanguage* tree_sitter_typescript();
}
void
tree_sitter_register_ts (Application_Links* app)
{
TSLanguage* language = tree_sitter_typescript();
Tree_Sitter_Language_Queries queries = {};
queries.ptr[Tree_Sitter_Language_Query_Highlights] = tree_sitter_query_new(app, language, TS_TS_HIGHLIGHT_QUERY);
queries.ptr[Tree_Sitter_Language_Query_Tags] = tree_sitter_query_new(app, language, TS_TS_TAGS_QUERY);
Tree_Sitter_Language_Flags flags = (
Tree_Sitter_Language_Can_Receive_Virtual_Indent
);
Tree_Sitter_Language_Definition* lang;
lang = tree_sitter_register_language(SCu8("ts"), language, queries, flags);
tree_sitter_languages_init_default_colors(app, lang);
}
#endif //TREE_SITTER_TS_H

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,152 @@
#include "tree_sitter/alloc.h"
#include "tree_sitter/parser.h"
#include <assert.h>
#include <string.h>
#include <wctype.h>
enum TokenType { RAW_STRING_DELIMITER, RAW_STRING_CONTENT };
/// The spec limits delimiters to 16 chars
#define MAX_DELIMITER_LENGTH 16
typedef struct {
uint8_t delimiter_length;
wchar_t delimiter[MAX_DELIMITER_LENGTH];
} Scanner;
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static inline void reset(Scanner *scanner) {
scanner->delimiter_length = 0;
memset(scanner->delimiter, 0, sizeof scanner->delimiter);
}
/// Scan the raw string delimiter in R"delimiter(content)delimiter"
static bool scan_raw_string_delimiter(Scanner *scanner, TSLexer *lexer) {
if (scanner->delimiter_length > 0) {
// Closing delimiter: must exactly match the opening delimiter.
// We already checked this when scanning content, but this is how we
// know when to stop. We can't stop at ", because R"""hello""" is valid.
for (int i = 0; i < scanner->delimiter_length; ++i) {
if (lexer->lookahead != scanner->delimiter[i]) {
return false;
}
advance(lexer);
}
reset(scanner);
return true;
}
// Opening delimiter: record the d-char-sequence up to (.
// d-char is any basic character except parens, backslashes, and spaces.
for (;;) {
if (scanner->delimiter_length >= MAX_DELIMITER_LENGTH || lexer->eof(lexer) || lexer->lookahead == '\\' ||
iswspace(lexer->lookahead)) {
return false;
}
if (lexer->lookahead == '(') {
// Rather than create a token for an empty delimiter, we fail and
// let the grammar fall back to a delimiter-less rule.
return scanner->delimiter_length > 0;
}
scanner->delimiter[scanner->delimiter_length++] = lexer->lookahead;
advance(lexer);
}
}
/// Scan the raw string content in R"delimiter(content)delimiter"
static bool scan_raw_string_content(Scanner *scanner, TSLexer *lexer) {
// The progress made through the delimiter since the last ')'.
// The delimiter may not contain ')' so a single counter suffices.
for (int delimiter_index = -1;;) {
// If we hit EOF, consider the content to terminate there.
// This forms an incomplete raw_string_literal, and models the code
// well.
if (lexer->eof(lexer)) {
lexer->mark_end(lexer);
return true;
}
if (delimiter_index >= 0) {
if (delimiter_index == scanner->delimiter_length) {
if (lexer->lookahead == '"') {
return true;
}
delimiter_index = -1;
} else {
if (lexer->lookahead == scanner->delimiter[delimiter_index]) {
delimiter_index += 1;
} else {
delimiter_index = -1;
}
}
}
if (delimiter_index == -1 && lexer->lookahead == ')') {
// The content doesn't include the )delimiter" part.
// We must still scan through it, but exclude it from the token.
lexer->mark_end(lexer);
delimiter_index = 0;
}
advance(lexer);
}
}
extern "C" {
void *tree_sitter_cpp_external_scanner_create() {
Scanner *scanner = (Scanner *)ts_calloc(1, sizeof(Scanner));
memset(scanner, 0, sizeof(Scanner));
return scanner;
}
bool tree_sitter_cpp_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[RAW_STRING_DELIMITER] && valid_symbols[RAW_STRING_CONTENT]) {
// we're in error recovery
return false;
}
// No skipping leading whitespace: raw-string grammar is space-sensitive.
if (valid_symbols[RAW_STRING_DELIMITER]) {
lexer->result_symbol = RAW_STRING_DELIMITER;
return scan_raw_string_delimiter(scanner, lexer);
}
if (valid_symbols[RAW_STRING_CONTENT]) {
lexer->result_symbol = RAW_STRING_CONTENT;
return scan_raw_string_content(scanner, lexer);
}
return false;
}
unsigned tree_sitter_cpp_external_scanner_serialize(void *payload, char *buffer) {
static_assert(MAX_DELIMITER_LENGTH * sizeof(wchar_t) < TREE_SITTER_SERIALIZATION_BUFFER_SIZE,
"Serialized delimiter is too long!");
Scanner *scanner = (Scanner *)payload;
size_t size = scanner->delimiter_length * sizeof(wchar_t);
memcpy(buffer, scanner->delimiter, size);
return (unsigned)size;
}
void tree_sitter_cpp_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
assert(length % sizeof(wchar_t) == 0 && "Can't decode serialized delimiter!");
Scanner *scanner = (Scanner *)payload;
scanner->delimiter_length = length / sizeof(wchar_t);
if (length > 0) {
memcpy(&scanner->delimiter[0], buffer, length);
}
}
void tree_sitter_cpp_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
ts_free(scanner);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,283 @@
#include <tree_sitter/alloc.h>
#include <tree_sitter/array.h>
#include <tree_sitter/parser.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
typedef enum {
// IDENTIFIER,
HEREDOC_START,
HEREDOC_END,
ERROR_SENTINEL,
} TokenType;
// typedef Array(char) String;
typedef Array(int32_t) String32;
static inline bool string_eq(String32 *self, String32 *other) {
if (self->size != other->size)
return false;
if (self->size == 0)
return self->size == other->size;
return memcmp(self->contents, other->contents, self->size * sizeof(self->contents[0])) == 0;
}
typedef struct {
Array(String32) heredocs;
} Scanner;
typedef enum { ERROR, END } ScanContentResult;
static inline void reset_heredoc(String32 *heredoc) {
array_delete(heredoc);
}
static inline void advance(TSLexer *lexer) { if (!lexer->eof(lexer)) lexer->advance(lexer, false); }
static inline void skip(TSLexer *lexer) { if (!lexer->eof(lexer)) lexer->advance(lexer, true); }
static inline bool starts_identifier(int32_t c) { return iswalpha(c) || c == '_' || c >= 0x80; }
static inline bool continues_identifier(int32_t c) { return iswalnum(c) || c == '_' || c >= 0x80; }
static String32 scan_heredoc_word(TSLexer *lexer) {
String32 result = (String32)array_new();
while (continues_identifier(lexer->lookahead)) {
array_push(&result, lexer->lookahead);
advance(lexer);
}
return result;
}
/*
static int check_for_keyword(String ident) {
switch (ident.size) {
case 2:
if (strncmp(ident.contents, "if", 2) == 0) return 1;
if (strncmp(ident.contents, "xx", 2) == 0) return 1;
return 0;
case 3:
if (strncmp(ident.contents, "ifx", 3) == 0) return 1;
if (strncmp(ident.contents, "for", 3) == 0) return 1;
return 0;
case 4:
if (strncmp(ident.contents, "then", 4) == 0) return 1;
if (strncmp(ident.contents, "else", 4) == 0) return 1;
if (strncmp(ident.contents, "null", 4) == 0) return 1;
if (strncmp(ident.contents, "case", 4) == 0) return 1;
if (strncmp(ident.contents, "enum", 4) == 0) return 1;
if (strncmp(ident.contents, "true", 4) == 0) return 1;
if (strncmp(ident.contents, "cast", 4) == 0) return 1;
return 0;
case 5:
if (strncmp(ident.contents, "while", 5) == 0) return 1;
if (strncmp(ident.contents, "break", 5) == 0) return 1;
if (strncmp(ident.contents, "using", 5) == 0) return 1;
if (strncmp(ident.contents, "defer", 5) == 0) return 1;
if (strncmp(ident.contents, "false", 5) == 0) return 1;
if (strncmp(ident.contents, "union", 5) == 0) return 1;
return 0;
case 6:
if (strncmp(ident.contents, "return", 6) == 0) return 1;
if (strncmp(ident.contents, "struct", 6) == 0) return 1;
if (strncmp(ident.contents, "remove", 6) == 0) return 1;
if (strncmp(ident.contents, "inline", 6) == 0) return 1;
return 0;
case 7:
if (strncmp(ident.contents, "size_of", 7) == 0) return 1;
if (strncmp(ident.contents, "type_of", 7) == 0) return 1;
if (strncmp(ident.contents, "code_of", 7) == 0) return 1;
if (strncmp(ident.contents, "context", 7) == 0) return 1;
return 0;
case 8:
if (strncmp(ident.contents, "continue", 8) == 0) return 1;
if (strncmp(ident.contents, "operator", 8) == 0) return 1;
return 0;
case 9:
if (strncmp(ident.contents, "type_info", 9) == 0) return 1;
if (strncmp(ident.contents, "no_inline", 9) == 0) return 1;
if (strncmp(ident.contents, "interface", 9) == 0) return 1;
return 0;
case 10:
if (strncmp(ident.contents, "enum_flags", 10) == 0) return 1;
return 0;
case 11:
if (strncmp(ident.contents, "is_constant", 11) == 0) return 1;
return 0;
case 12:
if (strncmp(ident.contents, "push_context", 12) == 0) return 1;
return 0;
case 14:
if (strncmp(ident.contents, "initializer_of", 14) == 0) return 1;
return 0;
default:
return 0;
}
return 0;
}
*/
extern "C" {
bool tree_sitter_jai_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
const bool is_error_recovery = valid_symbols[ERROR_SENTINEL];
if (is_error_recovery)
return false;
lexer->mark_end(lexer);
/*
if (valid_symbols[IDENTIFIER]) {
lexer->result_symbol = IDENTIFIER;
while (iswspace(lexer->lookahead))
skip(lexer);
if (starts_identifier(lexer->lookahead)) {
String ident = (String)array_new();
array_push(&ident, lexer->lookahead);
advance(lexer);
while (1) {
if (continues_identifier(lexer->lookahead)) {
array_push(&ident, lexer->lookahead);
advance(lexer);
continue;
} else if (lexer->lookahead == '\\') {
advance(lexer);
while (iswspace(lexer->lookahead)) {
advance(lexer);
}
continue;
}
break;
}
bool keyword = check_for_keyword(ident);
array_delete(&ident);
lexer->mark_end(lexer);
return !keyword;
}
return false;
}
*/
Scanner *scanner = (Scanner *)payload;
if (valid_symbols[HEREDOC_END]) {
lexer->result_symbol = HEREDOC_END;
if (scanner->heredocs.size == 0)
return false;
String32 heredoc = *array_back(&scanner->heredocs);
while (iswspace(lexer->lookahead))
skip(lexer);
String32 word = scan_heredoc_word(lexer);
if (!string_eq(&word, &heredoc)) {
array_delete(&word);
return false;
}
array_delete(&word);
lexer->mark_end(lexer);
array_delete(&array_pop(&scanner->heredocs));
return true;
}
if (valid_symbols[HEREDOC_START]) {
lexer->result_symbol = HEREDOC_START;
String32 heredoc = array_new();
while (iswspace(lexer->lookahead))
skip(lexer);
heredoc = scan_heredoc_word(lexer);
if (heredoc.size == 0) {
array_delete(&heredoc);
return false;
}
lexer->mark_end(lexer);
array_push(&scanner->heredocs, heredoc);
return true;
}
return false;
}
void *tree_sitter_jai_external_scanner_create() {
Scanner *scanner = (Scanner*)ts_calloc(1, sizeof(Scanner));
array_init(&scanner->heredocs);
return scanner;
}
unsigned tree_sitter_jai_external_scanner_serialize(void *payload, char *buffer) {
Scanner *scanner = (Scanner *)payload;
unsigned size = 0;
buffer[size++] = (char)scanner->heredocs.size;
for (unsigned j = 0; j < scanner->heredocs.size; j++) {
String32 *heredoc = &scanner->heredocs.contents[j];
unsigned word_size = heredoc->size * sizeof(heredoc->contents[0]);
if (size + 5 + word_size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE)
return 0;
memcpy(&buffer[size], &heredoc->size, sizeof(int32_t));
size += sizeof(int32_t);
if (heredoc->size > 0) {
memcpy(&buffer[size], heredoc->contents, word_size);
size += word_size;
}
}
return size;
}
void tree_sitter_jai_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {
Scanner *scanner = (Scanner *)payload;
unsigned size = 0;
for (uint32_t i = 0; i < scanner->heredocs.size; i++)
reset_heredoc(array_get(&scanner->heredocs, i));
if (length == 0)
return;
uint8_t open_heredoc_count = buffer[size++];
for (unsigned i = 0; i < open_heredoc_count; i++) {
String32 *heredoc = NULL;
if (i < scanner->heredocs.size) {
heredoc = array_get(&scanner->heredocs, i);
} else {
String32 new_heredoc = array_new();
array_push(&scanner->heredocs, new_heredoc);
heredoc = array_back(&scanner->heredocs);
}
memcpy(&heredoc->size, &buffer[size], sizeof(int32_t));
size += sizeof(int32_t);
unsigned word_size = heredoc->size * sizeof(heredoc->contents[0]);
if (word_size > 0) {
array_reserve(heredoc, heredoc->size);
memcpy(heredoc->contents, &buffer[size], word_size);
size += word_size;
}
}
assert(size == length);
}
void tree_sitter_jai_external_scanner_destroy(void *payload) {
Scanner *scanner = (Scanner *)payload;
for (size_t i = 0; i < scanner->heredocs.size; i++) {
array_delete(&scanner->heredocs.contents[i]);
}
array_delete(&scanner->heredocs);
ts_free(scanner);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,13 @@
#include "scanner.h"
void *tree_sitter_typescript_external_scanner_create() { return NULL; }
void tree_sitter_typescript_external_scanner_destroy(void *payload) {}
unsigned tree_sitter_typescript_external_scanner_serialize(void *payload, char *buffer) { return 0; }
void tree_sitter_typescript_external_scanner_deserialize(void *payload, const char *buffer, unsigned length) {}
bool tree_sitter_typescript_external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
return external_scanner_scan(payload, lexer, valid_symbols);
}

View File

@ -0,0 +1,347 @@
#include "./tree_sitter/parser.h"
#include <wctype.h>
enum TokenType {
AUTOMATIC_SEMICOLON,
TEMPLATE_CHARS,
TERNARY_QMARK,
HTML_COMMENT,
LOGICAL_OR,
ESCAPE_SEQUENCE,
REGEX_PATTERN,
JSX_TEXT,
FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON,
ERROR_RECOVERY,
};
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
static bool scan_template_chars(TSLexer *lexer) {
lexer->result_symbol = TEMPLATE_CHARS;
for (bool has_content = false;; has_content = true) {
lexer->mark_end(lexer);
switch (lexer->lookahead) {
case '`':
return has_content;
case '\0':
return false;
case '$':
advance(lexer);
if (lexer->lookahead == '{') {
return has_content;
}
break;
case '\\':
return has_content;
default:
advance(lexer);
}
}
}
static bool scan_whitespace_and_comments(TSLexer *lexer, bool *scanned_comment) {
for (;;) {
while (iswspace(lexer->lookahead)) {
skip(lexer);
}
if (lexer->lookahead == '/') {
skip(lexer);
if (lexer->lookahead == '/') {
skip(lexer);
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
skip(lexer);
}
*scanned_comment = true;
} else if (lexer->lookahead == '*') {
skip(lexer);
while (lexer->lookahead != 0) {
if (lexer->lookahead == '*') {
skip(lexer);
if (lexer->lookahead == '/') {
skip(lexer);
break;
}
} else {
skip(lexer);
}
}
} else {
return false;
}
} else {
return true;
}
}
}
static bool scan_automatic_semicolon(TSLexer *lexer, const bool *valid_symbols, bool *scanned_comment) {
lexer->result_symbol = AUTOMATIC_SEMICOLON;
lexer->mark_end(lexer);
for (;;) {
if (lexer->lookahead == 0) {
return true;
}
if (lexer->lookahead == '}') {
// Automatic semicolon insertion breaks detection of object patterns
// in a typed context:
// type F = ({a}: {a: number}) => number;
// Therefore, disable automatic semicolons when followed by typing
do {
skip(lexer);
} while (iswspace(lexer->lookahead));
if (lexer->lookahead == ':') {
return valid_symbols[LOGICAL_OR]; // Don't return false if we're in a ternary by checking if || is valid
}
return true;
}
if (!iswspace(lexer->lookahead)) {
return false;
}
if (lexer->lookahead == '\n') {
break;
}
skip(lexer);
}
skip(lexer);
if (!scan_whitespace_and_comments(lexer, scanned_comment)) {
return false;
}
switch (lexer->lookahead) {
case '`':
case ',':
case '.':
case ';':
case '*':
case '%':
case '>':
case '<':
case '=':
case '?':
case '^':
case '|':
case '&':
case '/':
case ':':
return false;
case '{':
if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) {
return false;
}
break;
// Don't insert a semicolon before a '[' or '(', unless we're parsing
// a type. Detect whether we're parsing a type or an expression using
// the validity of a binary operator token.
case '(':
case '[':
if (valid_symbols[LOGICAL_OR]) {
return false;
}
break;
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
case '+':
skip(lexer);
return lexer->lookahead == '+';
case '-':
skip(lexer);
return lexer->lookahead == '-';
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
case '!':
skip(lexer);
return lexer->lookahead != '=';
// Don't insert a semicolon before `in` or `instanceof`, but do insert one
// before an identifier.
case 'i':
skip(lexer);
if (lexer->lookahead != 'n') {
return true;
}
skip(lexer);
if (!iswalpha(lexer->lookahead)) {
return false;
}
for (unsigned i = 0; i < 8; i++) {
if (lexer->lookahead != "stanceof"[i]) {
return true;
}
skip(lexer);
}
if (!iswalpha(lexer->lookahead)) {
return false;
}
break;
}
return true;
}
static bool scan_ternary_qmark(TSLexer *lexer) {
for (;;) {
if (!iswspace(lexer->lookahead)) {
break;
}
skip(lexer);
}
if (lexer->lookahead == '?') {
advance(lexer);
/* Optional chaining. */
if (lexer->lookahead == '?' || lexer->lookahead == '.') {
return false;
}
lexer->mark_end(lexer);
lexer->result_symbol = TERNARY_QMARK;
/* TypeScript optional arguments contain the ?: sequence, possibly
with whitespace. */
for (;;) {
if (!iswspace(lexer->lookahead)) {
break;
}
advance(lexer);
}
if (lexer->lookahead == ':' || lexer->lookahead == ')' || lexer->lookahead == ',') {
return false;
}
if (lexer->lookahead == '.') {
advance(lexer);
if (iswdigit(lexer->lookahead)) {
return true;
}
return false;
}
return true;
}
return false;
}
static bool scan_closing_comment(TSLexer *lexer) {
while (iswspace(lexer->lookahead) || lexer->lookahead == 0x2028 || lexer->lookahead == 0x2029) {
skip(lexer);
}
const char *comment_start = "<!--";
const char *comment_end = "-->";
if (lexer->lookahead == '<') {
for (unsigned i = 0; i < 4; i++) {
if (lexer->lookahead != comment_start[i]) {
return false;
}
advance(lexer);
}
} else if (lexer->lookahead == '-') {
for (unsigned i = 0; i < 3; i++) {
if (lexer->lookahead != comment_end[i]) {
return false;
}
advance(lexer);
}
} else {
return false;
}
while (lexer->lookahead != 0 && lexer->lookahead != '\n' && lexer->lookahead != 0x2028 &&
lexer->lookahead != 0x2029) {
advance(lexer);
}
lexer->result_symbol = HTML_COMMENT;
lexer->mark_end(lexer);
return true;
}
static bool scan_jsx_text(TSLexer *lexer) {
// saw_text will be true if we see any non-whitespace content, or any whitespace content that is not a newline and
// does not immediately follow a newline.
bool saw_text = false;
// at_newline will be true if we are currently at a newline, or if we are at whitespace that is not a newline but
// immediately follows a newline.
bool at_newline = false;
while (lexer->lookahead != 0 && lexer->lookahead != '<' && lexer->lookahead != '>' && lexer->lookahead != '{' &&
lexer->lookahead != '}' && lexer->lookahead != '&') {
bool is_wspace = iswspace(lexer->lookahead);
if (lexer->lookahead == '\n') {
at_newline = true;
} else {
// If at_newline is already true, and we see some whitespace, then it must stay true.
// Otherwise, it should be false.
//
// See the table below to determine the logic for computing `saw_text`.
//
// |------------------------------------|
// | at_newline | is_wspace | saw_text |
// |------------|-----------|-----------|
// | false (0) | false (0) | true (1) |
// | false (0) | true (1) | true (1) |
// | true (1) | false (0) | true (1) |
// | true (1) | true (1) | false (0) |
// |------------------------------------|
at_newline &= is_wspace;
if (!at_newline) {
saw_text = true;
}
}
advance(lexer);
}
lexer->result_symbol = JSX_TEXT;
return saw_text;
}
static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
if (valid_symbols[TEMPLATE_CHARS]) {
if (valid_symbols[AUTOMATIC_SEMICOLON]) {
return false;
}
return scan_template_chars(lexer);
}
if (valid_symbols[JSX_TEXT] && scan_jsx_text(lexer)) {
return true;
}
if (valid_symbols[AUTOMATIC_SEMICOLON] || valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) {
bool scanned_comment = false;
bool ret = scan_automatic_semicolon(lexer, valid_symbols, &scanned_comment);
if (!ret && !scanned_comment && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?') {
return scan_ternary_qmark(lexer);
}
return ret;
}
if (valid_symbols[TERNARY_QMARK]) {
return scan_ternary_qmark(lexer);
}
if (valid_symbols[HTML_COMMENT] && !valid_symbols[LOGICAL_OR] && !valid_symbols[ESCAPE_SEQUENCE] &&
!valid_symbols[REGEX_PATTERN]) {
return scan_closing_comment(lexer);
}
return false;
}

View File

@ -0,0 +1,54 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
// Allow clients to override allocation functions
#ifdef TREE_SITTER_REUSE_ALLOCATOR
extern void *(*ts_current_malloc)(size_t size);
extern void *(*ts_current_calloc)(size_t count, size_t size);
extern void *(*ts_current_realloc)(void *ptr, size_t size);
extern void (*ts_current_free)(void *ptr);
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#else
#ifndef ts_malloc
#define ts_malloc malloc
#endif
#ifndef ts_calloc
#define ts_calloc calloc
#endif
#ifndef ts_realloc
#define ts_realloc realloc
#endif
#ifndef ts_free
#define ts_free free
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@ -0,0 +1,290 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(default : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@ -0,0 +1,266 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@ -0,0 +1,41 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
#define TS_PUBLIC
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC extern void *(*ts_current_malloc)(size_t size);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size);
TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size);
TS_PUBLIC extern void (*ts_current_free)(void *ptr);
// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,291 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include "./ts_assert.h"
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
ts_assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
ts_assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(pop)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@ -0,0 +1,286 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
// Used to index the field and supertype maps.
typedef struct {
uint16_t index;
uint16_t length;
} TSMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
uint16_t reserved_word_set_id;
} TSLexerMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexerMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
const char *name;
const TSSymbol *reserved_words;
uint16_t max_reserved_word_set_size;
uint32_t supertype_count;
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@ -0,0 +1,48 @@
#include "alloc.h"
#include "tree_sitter/api.h"
#include <stdlib.h>
static void *ts_malloc_default(size_t size) {
void *result = malloc(size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", size);
abort();
}
return result;
}
static void *ts_calloc_default(size_t count, size_t size) {
void *result = calloc(count, size);
if (count > 0 && !result) {
fprintf(stderr, "tree-sitter failed to allocate %zu bytes", count * size);
abort();
}
return result;
}
static void *ts_realloc_default(void *buffer, size_t size) {
void *result = realloc(buffer, size);
if (size > 0 && !result) {
fprintf(stderr, "tree-sitter failed to reallocate %zu bytes", size);
abort();
}
return result;
}
// Allow clients to override allocation functions dynamically
TS_PUBLIC void *(*ts_current_malloc)(size_t) = ts_malloc_default;
TS_PUBLIC void *(*ts_current_calloc)(size_t, size_t) = ts_calloc_default;
TS_PUBLIC void *(*ts_current_realloc)(void *, size_t) = ts_realloc_default;
TS_PUBLIC void (*ts_current_free)(void *) = free;
void ts_set_allocator(
void *(*new_malloc)(size_t size),
void *(*new_calloc)(size_t count, size_t size),
void *(*new_realloc)(void *ptr, size_t size),
void (*new_free)(void *ptr)
) {
ts_current_malloc = new_malloc ? new_malloc : ts_malloc_default;
ts_current_calloc = new_calloc ? new_calloc : ts_calloc_default;
ts_current_realloc = new_realloc ? new_realloc : ts_realloc_default;
ts_current_free = new_free ? new_free : free;
}

View File

@ -0,0 +1,41 @@
#ifndef TREE_SITTER_ALLOC_H_
#define TREE_SITTER_ALLOC_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#if defined(TREE_SITTER_HIDDEN_SYMBOLS) || defined(_WIN32)
#define TS_PUBLIC
#else
#define TS_PUBLIC __attribute__((visibility("default")))
#endif
TS_PUBLIC extern void *(*ts_current_malloc)(size_t size);
TS_PUBLIC extern void *(*ts_current_calloc)(size_t count, size_t size);
TS_PUBLIC extern void *(*ts_current_realloc)(void *ptr, size_t size);
TS_PUBLIC extern void (*ts_current_free)(void *ptr);
// Allow clients to override allocation functions
#ifndef ts_malloc
#define ts_malloc ts_current_malloc
#endif
#ifndef ts_calloc
#define ts_calloc ts_current_calloc
#endif
#ifndef ts_realloc
#define ts_realloc ts_current_realloc
#endif
#ifndef ts_free
#define ts_free ts_current_free
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ALLOC_H_

View File

@ -0,0 +1,291 @@
#ifndef TREE_SITTER_ARRAY_H_
#define TREE_SITTER_ARRAY_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./alloc.h"
#include "./ts_assert.h"
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning(push)
#pragma warning(disable : 4101)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-variable"
#endif
#define Array(T) \
struct { \
T *contents; \
uint32_t size; \
uint32_t capacity; \
}
/// Initialize an array.
#define array_init(self) \
((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL)
/// Create an empty array.
#define array_new() \
{ NULL, 0, 0 }
/// Get a pointer to the element at a given `index` in the array.
#define array_get(self, _index) \
(ts_assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index])
/// Get a pointer to the first element in the array.
#define array_front(self) array_get(self, 0)
/// Get a pointer to the last element in the array.
#define array_back(self) array_get(self, (self)->size - 1)
/// Clear the array, setting its size to zero. Note that this does not free any
/// memory allocated for the array's contents.
#define array_clear(self) ((self)->size = 0)
/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is
/// less than the array's current capacity, this function has no effect.
#define array_reserve(self, new_capacity) \
_array__reserve((Array *)(self), array_elem_size(self), new_capacity)
/// Free any memory allocated for this array. Note that this does not free any
/// memory allocated for the array's contents.
#define array_delete(self) _array__delete((Array *)(self))
/// Push a new `element` onto the end of the array.
#define array_push(self, element) \
(_array__grow((Array *)(self), 1, array_elem_size(self)), \
(self)->contents[(self)->size++] = (element))
/// Increase the array's size by `count` elements.
/// New elements are zero-initialized.
#define array_grow_by(self, count) \
do { \
if ((count) == 0) break; \
_array__grow((Array *)(self), count, array_elem_size(self)); \
memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \
(self)->size += (count); \
} while (0)
/// Append all elements from one array to the end of another.
#define array_push_all(self, other) \
array_extend((self), (other)->size, (other)->contents)
/// Append `count` elements to the end of the array, reading their values from the
/// `contents` pointer.
#define array_extend(self, count, contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), (self)->size, \
0, count, contents \
)
/// Remove `old_count` elements from the array starting at the given `index`. At
/// the same index, insert `new_count` new elements, reading their values from the
/// `new_contents` pointer.
#define array_splice(self, _index, old_count, new_count, new_contents) \
_array__splice( \
(Array *)(self), array_elem_size(self), _index, \
old_count, new_count, new_contents \
)
/// Insert one `element` into the array at the given `index`.
#define array_insert(self, _index, element) \
_array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element))
/// Remove one element from the array at the given `index`.
#define array_erase(self, _index) \
_array__erase((Array *)(self), array_elem_size(self), _index)
/// Pop the last element off the array, returning the element by value.
#define array_pop(self) ((self)->contents[--(self)->size])
/// Assign the contents of one array to another, reallocating if necessary.
#define array_assign(self, other) \
_array__assign((Array *)(self), (const Array *)(other), array_elem_size(self))
/// Swap one array with another
#define array_swap(self, other) \
_array__swap((Array *)(self), (Array *)(other))
/// Get the size of the array contents
#define array_elem_size(self) (sizeof *(self)->contents)
/// Search a sorted array for a given `needle` value, using the given `compare`
/// callback to determine the order.
///
/// If an existing element is found to be equal to `needle`, then the `index`
/// out-parameter is set to the existing value's index, and the `exists`
/// out-parameter is set to true. Otherwise, `index` is set to an index where
/// `needle` should be inserted in order to preserve the sorting, and `exists`
/// is set to false.
#define array_search_sorted_with(self, compare, needle, _index, _exists) \
_array__search_sorted(self, 0, compare, , needle, _index, _exists)
/// Search a sorted array for a given `needle` value, using integer comparisons
/// of a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_with`.
#define array_search_sorted_by(self, field, needle, _index, _exists) \
_array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists)
/// Insert a given `value` into a sorted array, using the given `compare`
/// callback to determine the order.
#define array_insert_sorted_with(self, compare, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_with(self, compare, &(value), &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
/// Insert a given `value` into a sorted array, using integer comparisons of
/// a given struct field (specified with a leading dot) to determine the order.
///
/// See also `array_search_sorted_by`.
#define array_insert_sorted_by(self, field, value) \
do { \
unsigned _index, _exists; \
array_search_sorted_by(self, field, (value) field, &_index, &_exists); \
if (!_exists) array_insert(self, _index, value); \
} while (0)
// Private
typedef Array(void) Array;
/// This is not what you're looking for, see `array_delete`.
static inline void _array__delete(Array *self) {
if (self->contents) {
ts_free(self->contents);
self->contents = NULL;
self->size = 0;
self->capacity = 0;
}
}
/// This is not what you're looking for, see `array_erase`.
static inline void _array__erase(Array *self, size_t element_size,
uint32_t index) {
ts_assert(index < self->size);
char *contents = (char *)self->contents;
memmove(contents + index * element_size, contents + (index + 1) * element_size,
(self->size - index - 1) * element_size);
self->size--;
}
/// This is not what you're looking for, see `array_reserve`.
static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) {
if (new_capacity > self->capacity) {
if (self->contents) {
self->contents = ts_realloc(self->contents, new_capacity * element_size);
} else {
self->contents = ts_malloc(new_capacity * element_size);
}
self->capacity = new_capacity;
}
}
/// This is not what you're looking for, see `array_assign`.
static inline void _array__assign(Array *self, const Array *other, size_t element_size) {
_array__reserve(self, element_size, other->size);
self->size = other->size;
memcpy(self->contents, other->contents, self->size * element_size);
}
/// This is not what you're looking for, see `array_swap`.
static inline void _array__swap(Array *self, Array *other) {
Array swap = *other;
*other = *self;
*self = swap;
}
/// This is not what you're looking for, see `array_push` or `array_grow_by`.
static inline void _array__grow(Array *self, uint32_t count, size_t element_size) {
uint32_t new_size = self->size + count;
if (new_size > self->capacity) {
uint32_t new_capacity = self->capacity * 2;
if (new_capacity < 8) new_capacity = 8;
if (new_capacity < new_size) new_capacity = new_size;
_array__reserve(self, element_size, new_capacity);
}
}
/// This is not what you're looking for, see `array_splice`.
static inline void _array__splice(Array *self, size_t element_size,
uint32_t index, uint32_t old_count,
uint32_t new_count, const void *elements) {
uint32_t new_size = self->size + new_count - old_count;
uint32_t old_end = index + old_count;
uint32_t new_end = index + new_count;
ts_assert(old_end <= self->size);
_array__reserve(self, element_size, new_size);
char *contents = (char *)self->contents;
if (self->size > old_end) {
memmove(
contents + new_end * element_size,
contents + old_end * element_size,
(self->size - old_end) * element_size
);
}
if (new_count > 0) {
if (elements) {
memcpy(
(contents + index * element_size),
elements,
new_count * element_size
);
} else {
memset(
(contents + index * element_size),
0,
new_count * element_size
);
}
}
self->size += new_count - old_count;
}
/// A binary search routine, based on Rust's `std::slice::binary_search_by`.
/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`.
#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \
do { \
*(_index) = start; \
*(_exists) = false; \
uint32_t size = (self)->size - *(_index); \
if (size == 0) break; \
int comparison; \
while (size > 1) { \
uint32_t half_size = size / 2; \
uint32_t mid_index = *(_index) + half_size; \
comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \
if (comparison <= 0) *(_index) = mid_index; \
size -= half_size; \
} \
comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \
if (comparison == 0) *(_exists) = true; \
else if (comparison < 0) *(_index) += 1; \
} while (0)
/// Helper macro for the `_sorted_by` routines below. This takes the left (existing)
/// parameter by reference in order to work with the generic sorting function above.
#define _compare_int(a, b) ((int)*(a) - (int)(b))
#ifdef _MSC_VER
#pragma warning(pop)
#elif defined(__GNUC__) || defined(__clang__)
#pragma GCC diagnostic pop
#endif
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_ARRAY_H_

View File

@ -0,0 +1,68 @@
#ifndef TREE_SITTER_ATOMIC_H_
#define TREE_SITTER_ATOMIC_H_
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#ifdef __TINYC__
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
*p += 1;
return *p;
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
*p-= 1;
return *p;
}
#elif defined(_WIN32)
#include <windows.h>
static inline size_t atomic_load(const volatile size_t *p) {
return *p;
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
return InterlockedIncrement((long volatile *)p);
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
return InterlockedDecrement((long volatile *)p);
}
#else
static inline size_t atomic_load(const volatile size_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_load_n(p, __ATOMIC_RELAXED);
#else
return __sync_fetch_and_add((volatile size_t *)p, 0);
#endif
}
static inline uint32_t atomic_inc(volatile uint32_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_add_fetch(p, 1U, __ATOMIC_SEQ_CST);
#else
return __sync_add_and_fetch(p, 1U);
#endif
}
static inline uint32_t atomic_dec(volatile uint32_t *p) {
#ifdef __ATOMIC_RELAXED
return __atomic_sub_fetch(p, 1U, __ATOMIC_SEQ_CST);
#else
return __sync_sub_and_fetch(p, 1U);
#endif
}
#endif
#endif // TREE_SITTER_ATOMIC_H_

View File

@ -0,0 +1,146 @@
#ifndef TREE_SITTER_CLOCK_H_
#define TREE_SITTER_CLOCK_H_
#include <stdbool.h>
#include <stdint.h>
typedef uint64_t TSDuration;
#ifdef _WIN32
// Windows:
// * Represent a time as a performance counter value.
// * Represent a duration as a number of performance counter ticks.
#include <windows.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return micros * (uint64_t)frequency.QuadPart / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
LARGE_INTEGER frequency;
QueryPerformanceFrequency(&frequency);
return self * 1000000 / (uint64_t)frequency.QuadPart;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
LARGE_INTEGER result;
QueryPerformanceCounter(&result);
return (uint64_t)result.QuadPart;
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#elif defined(CLOCK_MONOTONIC)
// POSIX with monotonic clock support (Linux, macOS)
// * Represent a time as a monotonic (seconds, nanoseconds) pair.
// * Represent a duration as a number of microseconds.
//
// On these platforms, parse timeouts will correspond accurately to
// real time, regardless of what other processes are running.
#include <time.h>
typedef struct timespec TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self;
}
static inline TSClock clock_now(void) {
TSClock result;
clock_gettime(CLOCK_MONOTONIC, &result);
return result;
}
static inline TSClock clock_null(void) {
return (TSClock) {0, 0};
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
TSClock result = base;
result.tv_sec += duration / 1000000;
result.tv_nsec += (duration % 1000000) * 1000;
if (result.tv_nsec >= 1000000000) {
result.tv_nsec -= 1000000000;
++(result.tv_sec);
}
return result;
}
static inline bool clock_is_null(TSClock self) {
return !self.tv_sec && !self.tv_nsec;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
if (self.tv_sec > other.tv_sec) return true;
if (self.tv_sec < other.tv_sec) return false;
return self.tv_nsec > other.tv_nsec;
}
#else
// POSIX without monotonic clock support
// * Represent a time as a process clock value.
// * Represent a duration as a number of process clock ticks.
//
// On these platforms, parse timeouts may be affected by other processes,
// which is not ideal, but is better than using a non-monotonic time API
// like `gettimeofday`.
#include <time.h>
typedef uint64_t TSClock;
static inline TSDuration duration_from_micros(uint64_t micros) {
return micros * (uint64_t)CLOCKS_PER_SEC / 1000000;
}
static inline uint64_t duration_to_micros(TSDuration self) {
return self * 1000000 / (uint64_t)CLOCKS_PER_SEC;
}
static inline TSClock clock_null(void) {
return 0;
}
static inline TSClock clock_now(void) {
return (uint64_t)clock();
}
static inline TSClock clock_after(TSClock base, TSDuration duration) {
return base + duration;
}
static inline bool clock_is_null(TSClock self) {
return !self;
}
static inline bool clock_is_gt(TSClock self, TSClock other) {
return self > other;
}
#endif
#endif // TREE_SITTER_CLOCK_H_

View File

@ -0,0 +1,11 @@
#ifndef TREE_SITTER_ERROR_COSTS_H_
#define TREE_SITTER_ERROR_COSTS_H_
#define ERROR_STATE 0
#define ERROR_COST_PER_RECOVERY 500
#define ERROR_COST_PER_MISSING_TREE 110
#define ERROR_COST_PER_SKIPPED_TREE 100
#define ERROR_COST_PER_SKIPPED_LINE 30
#define ERROR_COST_PER_SKIPPED_CHAR 1
#endif

View File

@ -0,0 +1,523 @@
#include "./get_changed_ranges.h"
#include "./subtree.h"
#include "./language.h"
#include "./error_costs.h"
#include "./tree_cursor.h"
#include "./ts_assert.h"
// #define DEBUG_GET_CHANGED_RANGES
static void ts_range_array_add(
TSRangeArray *self,
Length start,
Length end
) {
if (self->size > 0) {
TSRange *last_range = array_back(self);
if (start.bytes <= last_range->end_byte) {
last_range->end_byte = end.bytes;
last_range->end_point = end.extent;
return;
}
}
if (start.bytes < end.bytes) {
TSRange range = { start.extent, end.extent, start.bytes, end.bytes };
array_push(self, range);
}
}
bool ts_range_array_intersects(
const TSRangeArray *self,
unsigned start_index,
uint32_t start_byte,
uint32_t end_byte
) {
for (unsigned i = start_index; i < self->size; i++) {
TSRange *range = array_get(self, i);
if (range->end_byte > start_byte) {
if (range->start_byte >= end_byte) break;
return true;
}
}
return false;
}
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
) {
unsigned new_index = 0;
unsigned old_index = 0;
Length current_position = length_zero();
bool in_old_range = false;
bool in_new_range = false;
while (old_index < old_range_count || new_index < new_range_count) {
const TSRange *old_range = &old_ranges[old_index];
const TSRange *new_range = &new_ranges[new_index];
Length next_old_position;
if (in_old_range) {
next_old_position = (Length) {old_range->end_byte, old_range->end_point};
} else if (old_index < old_range_count) {
next_old_position = (Length) {old_range->start_byte, old_range->start_point};
} else {
next_old_position = LENGTH_MAX;
}
Length next_new_position;
if (in_new_range) {
next_new_position = (Length) {new_range->end_byte, new_range->end_point};
} else if (new_index < new_range_count) {
next_new_position = (Length) {new_range->start_byte, new_range->start_point};
} else {
next_new_position = LENGTH_MAX;
}
if (next_old_position.bytes < next_new_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_old_position);
}
if (in_old_range) old_index++;
current_position = next_old_position;
in_old_range = !in_old_range;
} else if (next_new_position.bytes < next_old_position.bytes) {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_new_range) new_index++;
current_position = next_new_position;
in_new_range = !in_new_range;
} else {
if (in_old_range != in_new_range) {
ts_range_array_add(differences, current_position, next_new_position);
}
if (in_old_range) old_index++;
if (in_new_range) new_index++;
in_old_range = !in_old_range;
in_new_range = !in_new_range;
current_position = next_new_position;
}
}
}
typedef struct {
TreeCursor cursor;
const TSLanguage *language;
unsigned visible_depth;
bool in_padding;
Subtree prev_external_token;
} Iterator;
static Iterator iterator_new(
TreeCursor *cursor,
const Subtree *tree,
const TSLanguage *language
) {
array_clear(&cursor->stack);
array_push(&cursor->stack, ((TreeCursorEntry) {
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.structural_child_index = 0,
}));
return (Iterator) {
.cursor = *cursor,
.language = language,
.visible_depth = 1,
.in_padding = false,
.prev_external_token = NULL_SUBTREE,
};
}
static bool iterator_done(Iterator *self) {
return self->cursor.stack.size == 0;
}
static Length iterator_start_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (self->in_padding) {
return entry.position;
} else {
return length_add(entry.position, ts_subtree_padding(*entry.subtree));
}
}
static Length iterator_end_position(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length result = length_add(entry.position, ts_subtree_padding(*entry.subtree));
if (self->in_padding) {
return result;
} else {
return length_add(result, ts_subtree_size(*entry.subtree));
}
}
static bool iterator_tree_is_visible(const Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (ts_subtree_visible(*entry.subtree)) return true;
if (self->cursor.stack.size > 1) {
Subtree parent = *array_get(&self->cursor.stack, self->cursor.stack.size - 2)->subtree;
return ts_language_alias_at(
self->language,
parent.ptr->production_id,
entry.structural_child_index
) != 0;
}
return false;
}
static void iterator_get_visible_state(
const Iterator *self,
Subtree *tree,
TSSymbol *alias_symbol,
uint32_t *start_byte
) {
uint32_t i = self->cursor.stack.size - 1;
if (self->in_padding) {
if (i == 0) return;
i--;
}
for (; i + 1 > 0; i--) {
TreeCursorEntry entry = *array_get(&self->cursor.stack, i);
if (i > 0) {
const Subtree *parent = array_get(&self->cursor.stack, i - 1)->subtree;
*alias_symbol = ts_language_alias_at(
self->language,
parent->ptr->production_id,
entry.structural_child_index
);
}
if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
*tree = *entry.subtree;
*start_byte = entry.position.bytes;
break;
}
}
}
static void iterator_ascend(Iterator *self) {
if (iterator_done(self)) return;
if (iterator_tree_is_visible(self) && !self->in_padding) self->visible_depth--;
if (array_back(&self->cursor.stack)->child_index > 0) self->in_padding = false;
self->cursor.stack.size--;
}
static bool iterator_descend(Iterator *self, uint32_t goal_position) {
if (self->in_padding) return false;
bool did_descend = false;
do {
did_descend = false;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
const Subtree *child = &ts_subtree_children(*entry.subtree)[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
Length child_right = length_add(child_left, ts_subtree_size(*child));
if (child_right.bytes > goal_position) {
array_push(&self->cursor.stack, ((TreeCursorEntry) {
.subtree = child,
.position = position,
.child_index = i,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (child_left.bytes > goal_position) {
self->in_padding = true;
} else {
self->visible_depth++;
}
return true;
}
did_descend = true;
break;
}
position = child_right;
if (!ts_subtree_extra(*child)) structural_child_index++;
Subtree last_external_token = ts_subtree_last_external_token(*child);
if (last_external_token.ptr) {
self->prev_external_token = last_external_token;
}
}
} while (did_descend);
return false;
}
static void iterator_advance(Iterator *self) {
if (self->in_padding) {
self->in_padding = false;
if (iterator_tree_is_visible(self)) {
self->visible_depth++;
} else {
iterator_descend(self, 0);
}
return;
}
for (;;) {
if (iterator_tree_is_visible(self)) self->visible_depth--;
TreeCursorEntry entry = array_pop(&self->cursor.stack);
if (iterator_done(self)) return;
const Subtree *parent = array_back(&self->cursor.stack)->subtree;
uint32_t child_index = entry.child_index + 1;
Subtree last_external_token = ts_subtree_last_external_token(*entry.subtree);
if (last_external_token.ptr) {
self->prev_external_token = last_external_token;
}
if (ts_subtree_child_count(*parent) > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
const Subtree *next_child = &ts_subtree_children(*parent)[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry) {
.subtree = next_child,
.position = position,
.child_index = child_index,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
if (ts_subtree_padding(*next_child).bytes > 0) {
self->in_padding = true;
} else {
self->visible_depth++;
}
} else {
iterator_descend(self, 0);
}
break;
}
}
}
typedef enum {
IteratorDiffers,
IteratorMayDiffer,
IteratorMatches,
} IteratorComparison;
static IteratorComparison iterator_compare(
const Iterator *old_iter,
const Iterator *new_iter
) {
Subtree old_tree = NULL_SUBTREE;
Subtree new_tree = NULL_SUBTREE;
uint32_t old_start = 0;
uint32_t new_start = 0;
TSSymbol old_alias_symbol = 0;
TSSymbol new_alias_symbol = 0;
iterator_get_visible_state(old_iter, &old_tree, &old_alias_symbol, &old_start);
iterator_get_visible_state(new_iter, &new_tree, &new_alias_symbol, &new_start);
TSSymbol old_symbol = ts_subtree_symbol(old_tree);
TSSymbol new_symbol = ts_subtree_symbol(new_tree);
if (!old_tree.ptr && !new_tree.ptr) return IteratorMatches;
if (!old_tree.ptr || !new_tree.ptr) return IteratorDiffers;
if (old_alias_symbol != new_alias_symbol || old_symbol != new_symbol) return IteratorDiffers;
uint32_t old_size = ts_subtree_size(old_tree).bytes;
uint32_t new_size = ts_subtree_size(new_tree).bytes;
TSStateId old_state = ts_subtree_parse_state(old_tree);
TSStateId new_state = ts_subtree_parse_state(new_tree);
bool old_has_external_tokens = ts_subtree_has_external_tokens(old_tree);
bool new_has_external_tokens = ts_subtree_has_external_tokens(new_tree);
uint32_t old_error_cost = ts_subtree_error_cost(old_tree);
uint32_t new_error_cost = ts_subtree_error_cost(new_tree);
if (
old_start != new_start ||
old_symbol == ts_builtin_sym_error ||
old_size != new_size ||
old_state == TS_TREE_STATE_NONE ||
new_state == TS_TREE_STATE_NONE ||
((old_state == ERROR_STATE) != (new_state == ERROR_STATE)) ||
old_error_cost != new_error_cost ||
old_has_external_tokens != new_has_external_tokens ||
ts_subtree_has_changes(old_tree) ||
(
old_has_external_tokens &&
!ts_subtree_external_scanner_state_eq(old_iter->prev_external_token, new_iter->prev_external_token)
)
) {
return IteratorMayDiffer;
}
return IteratorMatches;
}
#ifdef DEBUG_GET_CHANGED_RANGES
static inline void iterator_print_state(Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
TSPoint start = iterator_start_position(self).extent;
TSPoint end = iterator_end_position(self).extent;
const char *name = ts_language_symbol_name(self->language, ts_subtree_symbol(*entry.subtree));
printf(
"(%-25s %s\t depth:%u [%u, %u] - [%u, %u])",
name, self->in_padding ? "(p)" : " ",
self->visible_depth,
start.row, start.column,
end.row, end.column
);
}
#endif
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
) {
TSRangeArray results = array_new();
Iterator old_iter = iterator_new(cursor1, old_tree, language);
Iterator new_iter = iterator_new(cursor2, new_tree, language);
unsigned included_range_difference_index = 0;
Length position = iterator_start_position(&old_iter);
Length next_position = iterator_start_position(&new_iter);
if (position.bytes < next_position.bytes) {
ts_range_array_add(&results, position, next_position);
position = next_position;
} else if (position.bytes > next_position.bytes) {
ts_range_array_add(&results, next_position, position);
next_position = position;
}
do {
#ifdef DEBUG_GET_CHANGED_RANGES
printf("At [%-2u, %-2u] Compare ", position.extent.row, position.extent.column);
iterator_print_state(&old_iter);
printf("\tvs\t");
iterator_print_state(&new_iter);
puts("");
#endif
// Compare the old and new subtrees.
IteratorComparison comparison = iterator_compare(&old_iter, &new_iter);
// Even if the two subtrees appear to be identical, they could differ
// internally if they contain a range of text that was previously
// excluded from the parse, and is now included, or vice-versa.
if (comparison == IteratorMatches && ts_range_array_intersects(
included_range_differences,
included_range_difference_index,
position.bytes,
iterator_end_position(&old_iter).bytes
)) {
comparison = IteratorMayDiffer;
}
bool is_changed = false;
switch (comparison) {
// If the subtrees are definitely identical, move to the end
// of both subtrees.
case IteratorMatches:
next_position = iterator_end_position(&old_iter);
break;
// If the subtrees might differ internally, descend into both
// subtrees, finding the first child that spans the current position.
case IteratorMayDiffer:
if (iterator_descend(&old_iter, position.bytes)) {
if (!iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&old_iter);
}
} else if (iterator_descend(&new_iter, position.bytes)) {
is_changed = true;
next_position = iterator_end_position(&new_iter);
} else {
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
}
break;
// If the subtrees are different, record a change and then move
// to the end of both subtrees.
case IteratorDiffers:
is_changed = true;
next_position = length_min(
iterator_end_position(&old_iter),
iterator_end_position(&new_iter)
);
break;
}
// Ensure that both iterators are caught up to the current position.
while (
!iterator_done(&old_iter) &&
iterator_end_position(&old_iter).bytes <= next_position.bytes
) iterator_advance(&old_iter);
while (
!iterator_done(&new_iter) &&
iterator_end_position(&new_iter).bytes <= next_position.bytes
) iterator_advance(&new_iter);
// Ensure that both iterators are at the same depth in the tree.
while (old_iter.visible_depth > new_iter.visible_depth) {
iterator_ascend(&old_iter);
}
while (new_iter.visible_depth > old_iter.visible_depth) {
iterator_ascend(&new_iter);
}
if (is_changed) {
#ifdef DEBUG_GET_CHANGED_RANGES
printf(
" change: [[%u, %u] - [%u, %u]]\n",
position.extent.row + 1, position.extent.column,
next_position.extent.row + 1, next_position.extent.column
);
#endif
ts_range_array_add(&results, position, next_position);
}
position = next_position;
// Keep track of the current position in the included range differences
// array in order to avoid scanning the entire array on each iteration.
while (included_range_difference_index < included_range_differences->size) {
const TSRange *range = array_get(included_range_differences,
included_range_difference_index
);
if (range->end_byte <= position.bytes) {
included_range_difference_index++;
} else {
break;
}
}
} while (!iterator_done(&old_iter) && !iterator_done(&new_iter));
Length old_size = ts_subtree_total_size(*old_tree);
Length new_size = ts_subtree_total_size(*new_tree);
if (old_size.bytes < new_size.bytes) {
ts_range_array_add(&results, old_size, new_size);
} else if (new_size.bytes < old_size.bytes) {
ts_range_array_add(&results, new_size, old_size);
}
*cursor1 = old_iter.cursor;
*cursor2 = new_iter.cursor;
*ranges = results.contents;
return results.size;
}

View File

@ -0,0 +1,36 @@
#ifndef TREE_SITTER_GET_CHANGED_RANGES_H_
#define TREE_SITTER_GET_CHANGED_RANGES_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./tree_cursor.h"
#include "./subtree.h"
typedef Array(TSRange) TSRangeArray;
void ts_range_array_get_changed_ranges(
const TSRange *old_ranges, unsigned old_range_count,
const TSRange *new_ranges, unsigned new_range_count,
TSRangeArray *differences
);
bool ts_range_array_intersects(
const TSRangeArray *self, unsigned start_index,
uint32_t start_byte, uint32_t end_byte
);
unsigned ts_subtree_get_changed_ranges(
const Subtree *old_tree, const Subtree *new_tree,
TreeCursor *cursor1, TreeCursor *cursor2,
const TSLanguage *language,
const TSRangeArray *included_range_differences,
TSRange **ranges
);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_GET_CHANGED_RANGES_H_

View File

@ -0,0 +1,21 @@
// Determine endian and pointer size based on known defines.
// TS_BIG_ENDIAN and TS_PTR_SIZE can be set as -D compiler arguments
// to override this.
#if !defined(TS_BIG_ENDIAN)
#if (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) \
|| (defined( __APPLE_CC__) && (defined(__ppc__) || defined(__ppc64__)))
#define TS_BIG_ENDIAN 1
#else
#define TS_BIG_ENDIAN 0
#endif
#endif
#if !defined(TS_PTR_SIZE)
#if UINTPTR_MAX == 0xFFFFFFFF
#define TS_PTR_SIZE 32
#else
#define TS_PTR_SIZE 64
#endif
#endif

View File

@ -0,0 +1,293 @@
#include "./language.h"
#include "./wasm_store.h"
#include "tree_sitter/api.h"
#include <string.h>
const TSLanguage *ts_language_copy(const TSLanguage *self) {
if (self && ts_language_is_wasm(self)) {
ts_wasm_language_retain(self);
}
return self;
}
void ts_language_delete(const TSLanguage *self) {
if (self && ts_language_is_wasm(self)) {
ts_wasm_language_release(self);
}
}
uint32_t ts_language_symbol_count(const TSLanguage *self) {
return self->symbol_count + self->alias_count;
}
uint32_t ts_language_state_count(const TSLanguage *self) {
return self->state_count;
}
const TSSymbol *ts_language_supertypes(const TSLanguage *self, uint32_t *length) {
if (self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
*length = self->supertype_count;
return self->supertype_symbols;
} else {
*length = 0;
return NULL;
}
}
const TSSymbol *ts_language_subtypes(
const TSLanguage *self,
TSSymbol supertype,
uint32_t *length
) {
if (self->abi_version < LANGUAGE_VERSION_WITH_RESERVED_WORDS || !ts_language_symbol_metadata(self, supertype).supertype) {
*length = 0;
return NULL;
}
TSMapSlice slice = self->supertype_map_slices[supertype];
*length = slice.length;
return &self->supertype_map_entries[slice.index];
}
uint32_t ts_language_version(const TSLanguage *self) {
return self->abi_version;
}
uint32_t ts_language_abi_version(const TSLanguage *self) {
return self->abi_version;
}
const TSLanguageMetadata *ts_language_metadata(const TSLanguage *self) {
return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? &self->metadata : NULL;
}
const char *ts_language_name(const TSLanguage *self) {
return self->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS ? self->name : NULL;
}
uint32_t ts_language_field_count(const TSLanguage *self) {
return self->field_count;
}
void ts_language_table_entry(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
TableEntry *result
) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
result->action_count = 0;
result->is_reusable = false;
result->actions = NULL;
} else {
ts_assert(symbol < self->token_count);
uint32_t action_index = ts_language_lookup(self, state, symbol);
const TSParseActionEntry *entry = &self->parse_actions[action_index];
result->action_count = entry->entry.count;
result->is_reusable = entry->entry.reusable;
result->actions = (const TSParseAction *)(entry + 1);
}
}
TSLexerMode ts_language_lex_mode_for_state(
const TSLanguage *self,
TSStateId state
) {
if (self->abi_version < 15) {
TSLexMode mode = ((const TSLexMode *)self->lex_modes)[state];
return (TSLexerMode) {
.lex_state = mode.lex_state,
.external_lex_state = mode.external_lex_state,
.reserved_word_set_id = 0,
};
} else {
return self->lex_modes[state];
}
}
bool ts_language_is_reserved_word(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
TSLexerMode lex_mode = ts_language_lex_mode_for_state(self, state);
if (lex_mode.reserved_word_set_id > 0) {
unsigned start = lex_mode.reserved_word_set_id * self->max_reserved_word_set_size;
unsigned end = start + self->max_reserved_word_set_size;
for (unsigned i = start; i < end; i++) {
if (self->reserved_words[i] == symbol) return true;
if (self->reserved_words[i] == 0) break;
}
}
return false;
}
TSSymbolMetadata ts_language_symbol_metadata(
const TSLanguage *self,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error) {
return (TSSymbolMetadata) {.visible = true, .named = true};
} else if (symbol == ts_builtin_sym_error_repeat) {
return (TSSymbolMetadata) {.visible = false, .named = false};
} else {
return self->symbol_metadata[symbol];
}
}
TSSymbol ts_language_public_symbol(
const TSLanguage *self,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error) return symbol;
return self->public_symbol_map[symbol];
}
TSStateId ts_language_next_state(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) {
return 0;
} else if (symbol < self->token_count) {
uint32_t count;
const TSParseAction *actions = ts_language_actions(self, state, symbol, &count);
if (count > 0) {
TSParseAction action = actions[count - 1];
if (action.type == TSParseActionTypeShift) {
return action.shift.extra ? state : action.shift.state;
}
}
return 0;
} else {
return ts_language_lookup(self, state, symbol);
}
}
const char *ts_language_symbol_name(
const TSLanguage *self,
TSSymbol symbol
) {
if (symbol == ts_builtin_sym_error) {
return "ERROR";
} else if (symbol == ts_builtin_sym_error_repeat) {
return "_ERROR";
} else if (symbol < ts_language_symbol_count(self)) {
return self->symbol_names[symbol];
} else {
return NULL;
}
}
TSSymbol ts_language_symbol_for_name(
const TSLanguage *self,
const char *string,
uint32_t length,
bool is_named
) {
if (!strncmp(string, "ERROR", length)) return ts_builtin_sym_error;
uint16_t count = (uint16_t)ts_language_symbol_count(self);
for (TSSymbol i = 0; i < count; i++) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, i);
if ((!metadata.visible && !metadata.supertype) || metadata.named != is_named) continue;
const char *symbol_name = self->symbol_names[i];
if (!strncmp(symbol_name, string, length) && !symbol_name[length]) {
return self->public_symbol_map[i];
}
}
return 0;
}
TSSymbolType ts_language_symbol_type(
const TSLanguage *self,
TSSymbol symbol
) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(self, symbol);
if (metadata.named && metadata.visible) {
return TSSymbolTypeRegular;
} else if (metadata.visible) {
return TSSymbolTypeAnonymous;
} else if (metadata.supertype) {
return TSSymbolTypeSupertype;
} else {
return TSSymbolTypeAuxiliary;
}
}
const char *ts_language_field_name_for_id(
const TSLanguage *self,
TSFieldId id
) {
uint32_t count = ts_language_field_count(self);
if (count && id <= count) {
return self->field_names[id];
} else {
return NULL;
}
}
TSFieldId ts_language_field_id_for_name(
const TSLanguage *self,
const char *name,
uint32_t name_length
) {
uint16_t count = (uint16_t)ts_language_field_count(self);
for (TSSymbol i = 1; i < count + 1; i++) {
switch (strncmp(name, self->field_names[i], name_length)) {
case 0:
if (self->field_names[i][name_length] == 0) return i;
break;
case -1:
return 0;
default:
break;
}
}
return 0;
}
TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) {
if (state >= self->state_count) return NULL;
LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator));
*iterator = ts_language_lookaheads(self, state);
return (TSLookaheadIterator *)iterator;
}
void ts_lookahead_iterator_delete(TSLookaheadIterator *self) {
ts_free(self);
}
bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) {
LookaheadIterator *iterator = (LookaheadIterator *)self;
if (state >= iterator->language->state_count) return false;
*iterator = ts_language_lookaheads(iterator->language, state);
return true;
}
const TSLanguage *ts_lookahead_iterator_language(const TSLookaheadIterator *self) {
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return iterator->language;
}
bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) {
if (state >= language->state_count) return false;
LookaheadIterator *iterator = (LookaheadIterator *)self;
*iterator = ts_language_lookaheads(language, state);
return true;
}
bool ts_lookahead_iterator_next(TSLookaheadIterator *self) {
LookaheadIterator *iterator = (LookaheadIterator *)self;
return ts_lookahead_iterator__next(iterator);
}
TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) {
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return iterator->symbol;
}
const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) {
const LookaheadIterator *iterator = (const LookaheadIterator *)self;
return ts_language_symbol_name(iterator->language, iterator->symbol);
}

View File

@ -0,0 +1,293 @@
#ifndef TREE_SITTER_LANGUAGE_H_
#define TREE_SITTER_LANGUAGE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./subtree.h"
#include "./parser.h"
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
#define LANGUAGE_VERSION_WITH_RESERVED_WORDS 15
#define LANGUAGE_VERSION_WITH_PRIMARY_STATES 14
typedef struct {
const TSParseAction *actions;
uint32_t action_count;
bool is_reusable;
} TableEntry;
typedef struct {
const TSLanguage *language;
const uint16_t *data;
const uint16_t *group_end;
TSStateId state;
uint16_t table_value;
uint16_t section_index;
uint16_t group_count;
bool is_small_state;
const TSParseAction *actions;
TSSymbol symbol;
TSStateId next_state;
uint16_t action_count;
} LookaheadIterator;
void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result);
TSLexerMode ts_language_lex_mode_for_state(const TSLanguage *self, TSStateId state);
bool ts_language_is_reserved_word(const TSLanguage *self, TSStateId state, TSSymbol symbol);
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *self, TSSymbol symbol);
TSSymbol ts_language_public_symbol(const TSLanguage *self, TSSymbol symbol);
static inline const TSParseAction *ts_language_actions(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
uint32_t *count
) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
*count = entry.action_count;
return entry.actions;
}
static inline bool ts_language_has_reduce_action(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
TableEntry entry;
ts_language_table_entry(self, state, symbol, &entry);
return entry.action_count > 0 && entry.actions[0].type == TSParseActionTypeReduce;
}
// Lookup the table value for a given symbol and state.
//
// For non-terminal symbols, the table value represents a successor state.
// For terminal symbols, it represents an index in the actions table.
// For 'large' parse states, this is a direct lookup. For 'small' parse
// states, this requires searching through the symbol groups to find
// the given symbol.
static inline uint16_t ts_language_lookup(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
if (state >= self->large_state_count) {
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
const uint16_t *data = &self->small_parse_table[index];
uint16_t group_count = *(data++);
for (unsigned i = 0; i < group_count; i++) {
uint16_t section_value = *(data++);
uint16_t symbol_count = *(data++);
for (unsigned j = 0; j < symbol_count; j++) {
if (*(data++) == symbol) return section_value;
}
}
return 0;
} else {
return self->parse_table[state * self->symbol_count + symbol];
}
}
static inline bool ts_language_has_actions(
const TSLanguage *self,
TSStateId state,
TSSymbol symbol
) {
return ts_language_lookup(self, state, symbol) != 0;
}
// Iterate over all of the symbols that are valid in the given state.
//
// For 'large' parse states, this just requires iterating through
// all possible symbols and checking the parse table for each one.
// For 'small' parse states, this exploits the structure of the
// table to only visit the valid symbols.
static inline LookaheadIterator ts_language_lookaheads(
const TSLanguage *self,
TSStateId state
) {
bool is_small_state = state >= self->large_state_count;
const uint16_t *data;
const uint16_t *group_end = NULL;
uint16_t group_count = 0;
if (is_small_state) {
uint32_t index = self->small_parse_table_map[state - self->large_state_count];
data = &self->small_parse_table[index];
group_end = data + 1;
group_count = *data;
} else {
data = &self->parse_table[state * self->symbol_count] - 1;
}
return (LookaheadIterator) {
.language = self,
.data = data,
.group_end = group_end,
.group_count = group_count,
.is_small_state = is_small_state,
.symbol = UINT16_MAX,
.next_state = 0,
};
}
static inline bool ts_lookahead_iterator__next(LookaheadIterator *self) {
// For small parse states, valid symbols are listed explicitly,
// grouped by their value. There's no need to look up the actions
// again until moving to the next group.
if (self->is_small_state) {
self->data++;
if (self->data == self->group_end) {
if (self->group_count == 0) return false;
self->group_count--;
self->table_value = *(self->data++);
unsigned symbol_count = *(self->data++);
self->group_end = self->data + symbol_count;
self->symbol = *self->data;
} else {
self->symbol = *self->data;
return true;
}
}
// For large parse states, iterate through every symbol until one
// is found that has valid actions.
else {
do {
self->data++;
self->symbol++;
if (self->symbol >= self->language->symbol_count) return false;
self->table_value = *self->data;
} while (!self->table_value);
}
// Depending on if the symbols is terminal or non-terminal, the table value either
// represents a list of actions or a successor state.
if (self->symbol < self->language->token_count) {
const TSParseActionEntry *entry = &self->language->parse_actions[self->table_value];
self->action_count = entry->entry.count;
self->actions = (const TSParseAction *)(entry + 1);
self->next_state = 0;
} else {
self->action_count = 0;
self->next_state = self->table_value;
}
return true;
}
// Whether the state is a "primary state". If this returns false, it indicates that there exists
// another state that behaves identically to this one with respect to query analysis.
static inline bool ts_language_state_is_primary(
const TSLanguage *self,
TSStateId state
) {
if (self->abi_version >= LANGUAGE_VERSION_WITH_PRIMARY_STATES) {
return state == self->primary_state_ids[state];
} else {
return true;
}
}
static inline const bool *ts_language_enabled_external_tokens(
const TSLanguage *self,
unsigned external_scanner_state
) {
if (external_scanner_state == 0) {
return NULL;
} else {
return self->external_scanner.states + self->external_token_count * external_scanner_state;
}
}
static inline const TSSymbol *ts_language_alias_sequence(
const TSLanguage *self,
uint32_t production_id
) {
return production_id ?
&self->alias_sequences[production_id * self->max_alias_sequence_length] :
NULL;
}
static inline TSSymbol ts_language_alias_at(
const TSLanguage *self,
uint32_t production_id,
uint32_t child_index
) {
return production_id ?
self->alias_sequences[production_id * self->max_alias_sequence_length + child_index] :
0;
}
static inline void ts_language_field_map(
const TSLanguage *self,
uint32_t production_id,
const TSFieldMapEntry **start,
const TSFieldMapEntry **end
) {
if (self->field_count == 0) {
*start = NULL;
*end = NULL;
return;
}
TSMapSlice slice = self->field_map_slices[production_id];
*start = &self->field_map_entries[slice.index];
*end = &self->field_map_entries[slice.index] + slice.length;
}
static inline void ts_language_aliases_for_symbol(
const TSLanguage *self,
TSSymbol original_symbol,
const TSSymbol **start,
const TSSymbol **end
) {
*start = &self->public_symbol_map[original_symbol];
*end = *start + 1;
unsigned idx = 0;
for (;;) {
TSSymbol symbol = self->alias_map[idx++];
if (symbol == 0 || symbol > original_symbol) break;
uint16_t count = self->alias_map[idx++];
if (symbol == original_symbol) {
*start = &self->alias_map[idx];
*end = &self->alias_map[idx + count];
break;
}
idx += count;
}
}
static inline void ts_language_write_symbol_as_dot_string(
const TSLanguage *self,
FILE *f,
TSSymbol symbol
) {
const char *name = ts_language_symbol_name(self, symbol);
for (const char *chr = name; *chr; chr++) {
switch (*chr) {
case '"':
case '\\':
fputc('\\', f);
fputc(*chr, f);
break;
case '\n':
fputs("\\n", f);
break;
case '\t':
fputs("\\t", f);
break;
default:
fputc(*chr, f);
break;
}
}
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LANGUAGE_H_

View File

@ -0,0 +1,52 @@
#ifndef TREE_SITTER_LENGTH_H_
#define TREE_SITTER_LENGTH_H_
#include <stdlib.h>
#include <stdbool.h>
#include "./point.h"
#include "tree_sitter/api.h"
typedef struct {
uint32_t bytes;
TSPoint extent;
} Length;
static const Length LENGTH_UNDEFINED = {0, {0, 1}};
static const Length LENGTH_MAX = {UINT32_MAX, {UINT32_MAX, UINT32_MAX}};
static inline bool length_is_undefined(Length length) {
return length.bytes == 0 && length.extent.column != 0;
}
static inline Length length_min(Length len1, Length len2) {
return (len1.bytes < len2.bytes) ? len1 : len2;
}
static inline Length length_add(Length len1, Length len2) {
Length result;
result.bytes = len1.bytes + len2.bytes;
result.extent = point_add(len1.extent, len2.extent);
return result;
}
static inline Length length_sub(Length len1, Length len2) {
Length result;
result.bytes = (len1.bytes >= len2.bytes) ? len1.bytes - len2.bytes : 0;
result.extent = point_sub(len1.extent, len2.extent);
return result;
}
static inline Length length_zero(void) {
Length result = {0, {0, 0}};
return result;
}
static inline Length length_saturating_sub(Length len1, Length len2) {
if (len1.bytes > len2.bytes) {
return length_sub(len1, len2);
} else {
return length_zero();
}
}
#endif

View File

@ -0,0 +1,483 @@
#include "./length.h"
#include "./lexer.h"
#include "./unicode.h"
#include "tree_sitter/api.h"
#include <stdarg.h>
#include <stdio.h>
#define LOG(message, character) \
if (self->logger.log) { \
snprintf( \
self->debug_buffer, \
TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
32 <= character && character < 127 ? \
message " character:'%c'" : \
message " character:%d", \
character \
); \
self->logger.log( \
self->logger.payload, \
TSLogTypeLex, \
self->debug_buffer \
); \
}
static const int32_t BYTE_ORDER_MARK = 0xFEFF;
static const TSRange DEFAULT_RANGE = {
.start_point = {
.row = 0,
.column = 0,
},
.end_point = {
.row = UINT32_MAX,
.column = UINT32_MAX,
},
.start_byte = 0,
.end_byte = UINT32_MAX
};
/**
* Sets the column data to the given value and marks it valid.
* @param self The lexer state.
* @param val The new value of the column data.
*/
static void ts_lexer__set_column_data(Lexer *self, uint32_t val) {
self->column_data.valid = true;
self->column_data.value = val;
}
/**
* Increments the value of the column data; no-op if invalid.
* @param self The lexer state.
*/
static void ts_lexer__increment_column_data(Lexer *self) {
if (self->column_data.valid) {
self->column_data.value++;
}
}
/**
* Marks the column data as invalid.
* @param self The lexer state.
*/
static void ts_lexer__invalidate_column_data(Lexer *self) {
self->column_data.valid = false;
self->column_data.value = 0;
}
// Check if the lexer has reached EOF. This state is stored
// by setting the lexer's `current_included_range_index` such that
// it has consumed all of its available ranges.
static bool ts_lexer__eof(const TSLexer *_self) {
Lexer *self = (Lexer *)_self;
return self->current_included_range_index == self->included_range_count;
}
// Clear the currently stored chunk of source code, because the lexer's
// position has changed.
static void ts_lexer__clear_chunk(Lexer *self) {
self->chunk = NULL;
self->chunk_size = 0;
self->chunk_start = 0;
}
// Call the lexer's input callback to obtain a new chunk of source code
// for the current position.
static void ts_lexer__get_chunk(Lexer *self) {
self->chunk_start = self->current_position.bytes;
self->chunk = self->input.read(
self->input.payload,
self->current_position.bytes,
self->current_position.extent,
&self->chunk_size
);
if (!self->chunk_size) {
self->current_included_range_index = self->included_range_count;
self->chunk = NULL;
}
}
// Decode the next unicode character in the current chunk of source code.
// This assumes that the lexer has already retrieved a chunk of source
// code that spans the current position.
static void ts_lexer__get_lookahead(Lexer *self) {
uint32_t position_in_chunk = self->current_position.bytes - self->chunk_start;
uint32_t size = self->chunk_size - position_in_chunk;
if (size == 0) {
self->lookahead_size = 1;
self->data.lookahead = '\0';
return;
}
const uint8_t *chunk = (const uint8_t *)self->chunk + position_in_chunk;
DecodeFunction decode =
self->input.encoding == TSInputEncodingUTF8 ? ts_decode_utf8 :
self->input.encoding == TSInputEncodingUTF16LE ? ts_decode_utf16_le :
self->input.encoding == TSInputEncodingUTF16BE ? ts_decode_utf16_be : self->input.decode;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
// If this chunk ended in the middle of a multi-byte character,
// try again with a fresh chunk.
if (self->data.lookahead == TS_DECODE_ERROR && size < 4) {
ts_lexer__get_chunk(self);
chunk = (const uint8_t *)self->chunk;
size = self->chunk_size;
self->lookahead_size = decode(chunk, size, &self->data.lookahead);
}
if (self->data.lookahead == TS_DECODE_ERROR) {
self->lookahead_size = 1;
}
}
static void ts_lexer_goto(Lexer *self, Length position) {
if (position.bytes != self->current_position.bytes) {
ts_lexer__invalidate_column_data(self);
}
self->current_position = position;
// Move to the first valid position at or after the given position.
bool found_included_range = false;
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *included_range = &self->included_ranges[i];
if (
included_range->end_byte > self->current_position.bytes &&
included_range->end_byte > included_range->start_byte
) {
if (included_range->start_byte >= self->current_position.bytes) {
self->current_position = (Length) {
.bytes = included_range->start_byte,
.extent = included_range->start_point,
};
}
self->current_included_range_index = i;
found_included_range = true;
break;
}
}
if (found_included_range) {
// If the current position is outside of the current chunk of text,
// then clear out the current chunk of text.
if (self->chunk && (
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
)) {
ts_lexer__clear_chunk(self);
}
self->lookahead_size = 0;
self->data.lookahead = '\0';
}
// If the given position is beyond any of included ranges, move to the EOF
// state - past the end of the included ranges.
else {
self->current_included_range_index = self->included_range_count;
TSRange *last_included_range = &self->included_ranges[self->included_range_count - 1];
self->current_position = (Length) {
.bytes = last_included_range->end_byte,
.extent = last_included_range->end_point,
};
ts_lexer__clear_chunk(self);
self->lookahead_size = 1;
self->data.lookahead = '\0';
}
}
/**
* Actually advances the lexer. Does not log anything.
* @param self The lexer state.
* @param skip Whether to mark the consumed codepoint as whitespace.
*/
static void ts_lexer__do_advance(Lexer *self, bool skip) {
if (self->lookahead_size) {
if (self->data.lookahead == '\n') {
self->current_position.extent.row++;
self->current_position.extent.column = 0;
ts_lexer__set_column_data(self, 0);
} else {
bool is_bom = self->current_position.bytes == 0 &&
self->data.lookahead == BYTE_ORDER_MARK;
if (!is_bom) ts_lexer__increment_column_data(self);
self->current_position.extent.column += self->lookahead_size;
}
self->current_position.bytes += self->lookahead_size;
}
const TSRange *current_range = &self->included_ranges[self->current_included_range_index];
while (
self->current_position.bytes >= current_range->end_byte ||
current_range->end_byte == current_range->start_byte
) {
if (self->current_included_range_index < self->included_range_count) {
self->current_included_range_index++;
}
if (self->current_included_range_index < self->included_range_count) {
current_range++;
self->current_position = (Length) {
current_range->start_byte,
current_range->start_point,
};
} else {
current_range = NULL;
break;
}
}
if (skip) self->token_start_position = self->current_position;
if (current_range) {
if (
self->current_position.bytes < self->chunk_start ||
self->current_position.bytes >= self->chunk_start + self->chunk_size
) {
ts_lexer__get_chunk(self);
}
ts_lexer__get_lookahead(self);
} else {
ts_lexer__clear_chunk(self);
self->data.lookahead = '\0';
self->lookahead_size = 1;
}
}
// Advance to the next character in the source code, retrieving a new
// chunk of source code if needed.
static void ts_lexer__advance(TSLexer *_self, bool skip) {
Lexer *self = (Lexer *)_self;
if (!self->chunk) return;
if (skip) {
LOG("skip", self->data.lookahead)
} else {
LOG("consume", self->data.lookahead)
}
ts_lexer__do_advance(self, skip);
}
// Mark that a token match has completed. This can be called multiple
// times if a longer match is found later.
static void ts_lexer__mark_end(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
if (!ts_lexer__eof(&self->data)) {
// If the lexer is right at the beginning of included range,
// then the token should be considered to end at the *end* of the
// previous included range, rather than here.
TSRange *current_included_range = &self->included_ranges[
self->current_included_range_index
];
if (
self->current_included_range_index > 0 &&
self->current_position.bytes == current_included_range->start_byte
) {
TSRange *previous_included_range = current_included_range - 1;
self->token_end_position = (Length) {
previous_included_range->end_byte,
previous_included_range->end_point,
};
return;
}
}
self->token_end_position = self->current_position;
}
static uint32_t ts_lexer__get_column(TSLexer *_self) {
Lexer *self = (Lexer *)_self;
self->did_get_column = true;
if (!self->column_data.valid) {
// Record current position
uint32_t goal_byte = self->current_position.bytes;
// Back up to the beginning of the line
Length start_of_col = {
self->current_position.bytes - self->current_position.extent.column,
{self->current_position.extent.row, 0},
};
ts_lexer_goto(self, start_of_col);
ts_lexer__set_column_data(self, 0);
ts_lexer__get_chunk(self);
if (!ts_lexer__eof(_self)) {
ts_lexer__get_lookahead(self);
// Advance to the recorded position
while (self->current_position.bytes < goal_byte && !ts_lexer__eof(_self) && self->chunk) {
ts_lexer__do_advance(self, false);
if (ts_lexer__eof(_self)) break;
}
}
}
return self->column_data.value;
}
// Is the lexer at a boundary between two disjoint included ranges of
// source code? This is exposed as an API because some languages' external
// scanners need to perform custom actions at these boundaries.
static bool ts_lexer__is_at_included_range_start(const TSLexer *_self) {
const Lexer *self = (const Lexer *)_self;
if (self->current_included_range_index < self->included_range_count) {
TSRange *current_range = &self->included_ranges[self->current_included_range_index];
return self->current_position.bytes == current_range->start_byte;
} else {
return false;
}
}
static void ts_lexer__log(const TSLexer *_self, const char *fmt, ...) {
Lexer *self = (Lexer *)_self;
va_list args;
va_start(args, fmt);
if (self->logger.log) {
vsnprintf(self->debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, fmt, args);
self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer);
}
va_end(args);
}
void ts_lexer_init(Lexer *self) {
*self = (Lexer) {
.data = {
// The lexer's methods are stored as struct fields so that generated
// parsers can call them without needing to be linked against this
// library.
.advance = ts_lexer__advance,
.mark_end = ts_lexer__mark_end,
.get_column = ts_lexer__get_column,
.is_at_included_range_start = ts_lexer__is_at_included_range_start,
.eof = ts_lexer__eof,
.log = ts_lexer__log,
.lookahead = 0,
.result_symbol = 0,
},
.chunk = NULL,
.chunk_size = 0,
.chunk_start = 0,
.current_position = {0, {0, 0}},
.logger = {
.payload = NULL,
.log = NULL
},
.included_ranges = NULL,
.included_range_count = 0,
.current_included_range_index = 0,
.did_get_column = false,
.column_data = {
.valid = false,
.value = 0
}
};
ts_lexer_set_included_ranges(self, NULL, 0);
}
void ts_lexer_delete(Lexer *self) {
ts_free(self->included_ranges);
}
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
ts_lexer__clear_chunk(self);
ts_lexer_goto(self, self->current_position);
}
// Move the lexer to the given position. This doesn't do any work
// if the parser is already at the given position.
void ts_lexer_reset(Lexer *self, Length position) {
if (position.bytes != self->current_position.bytes) {
ts_lexer_goto(self, position);
}
}
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->token_end_position = LENGTH_UNDEFINED;
self->data.result_symbol = 0;
self->did_get_column = false;
if (!ts_lexer__eof(&self->data)) {
if (!self->chunk_size) ts_lexer__get_chunk(self);
if (!self->lookahead_size) ts_lexer__get_lookahead(self);
if (self->current_position.bytes == 0) {
if (self->data.lookahead == BYTE_ORDER_MARK) {
ts_lexer__advance(&self->data, true);
}
ts_lexer__set_column_data(self, 0);
}
}
}
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte) {
if (length_is_undefined(self->token_end_position)) {
ts_lexer__mark_end(&self->data);
}
// If the token ended at an included range boundary, then its end position
// will have been reset to the end of the preceding range. Reset the start
// position to match.
if (self->token_end_position.bytes < self->token_start_position.bytes) {
self->token_start_position = self->token_end_position;
}
uint32_t current_lookahead_end_byte = self->current_position.bytes + 1;
// In order to determine that a byte sequence is invalid UTF8 or UTF16,
// the character decoding algorithm may have looked at the following byte.
// Therefore, the next byte *after* the current (invalid) character
// affects the interpretation of the current character.
if (self->data.lookahead == TS_DECODE_ERROR) {
current_lookahead_end_byte += 4; // the maximum number of bytes read to identify an invalid code point
}
if (current_lookahead_end_byte > *lookahead_end_byte) {
*lookahead_end_byte = current_lookahead_end_byte;
}
}
void ts_lexer_mark_end(Lexer *self) {
ts_lexer__mark_end(&self->data);
}
bool ts_lexer_set_included_ranges(
Lexer *self,
const TSRange *ranges,
uint32_t count
) {
if (count == 0 || !ranges) {
ranges = &DEFAULT_RANGE;
count = 1;
} else {
uint32_t previous_byte = 0;
for (unsigned i = 0; i < count; i++) {
const TSRange *range = &ranges[i];
if (
range->start_byte < previous_byte ||
range->end_byte < range->start_byte
) return false;
previous_byte = range->end_byte;
}
}
size_t size = count * sizeof(TSRange);
self->included_ranges = ts_realloc(self->included_ranges, size);
memcpy(self->included_ranges, ranges, size);
self->included_range_count = count;
ts_lexer_goto(self, self->current_position);
return true;
}
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count) {
*count = self->included_range_count;
return self->included_ranges;
}
#undef LOG

View File

@ -0,0 +1,54 @@
#ifndef TREE_SITTER_LEXER_H_
#define TREE_SITTER_LEXER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./length.h"
#include "./subtree.h"
#include "tree_sitter/api.h"
#include "./parser.h"
typedef struct {
uint32_t value;
bool valid;
} ColumnData;
typedef struct {
TSLexer data;
Length current_position;
Length token_start_position;
Length token_end_position;
TSRange *included_ranges;
const char *chunk;
TSInput input;
TSLogger logger;
uint32_t included_range_count;
uint32_t current_included_range_index;
uint32_t chunk_start;
uint32_t chunk_size;
uint32_t lookahead_size;
bool did_get_column;
ColumnData column_data;
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
} Lexer;
void ts_lexer_init(Lexer *self);
void ts_lexer_delete(Lexer *self);
void ts_lexer_set_input(Lexer *self, TSInput input);
void ts_lexer_reset(Lexer *self, Length position);
void ts_lexer_start(Lexer *self);
void ts_lexer_finish(Lexer *self, uint32_t *lookahead_end_byte);
void ts_lexer_mark_end(Lexer *self);
bool ts_lexer_set_included_ranges(Lexer *self, const TSRange *ranges, uint32_t count);
TSRange *ts_lexer_included_ranges(const Lexer *self, uint32_t *count);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_LEXER_H_

View File

@ -0,0 +1,12 @@
#include "./alloc.c"
#include "./get_changed_ranges.c"
#include "./language.c"
#include "./lexer.c"
#include "./node.c"
#include "./parser.c"
#include "./query.c"
#include "./stack.c"
#include "./subtree.c"
#include "./tree_cursor.c"
#include "./tree.c"
#include "./wasm_store.c"

View File

@ -0,0 +1,875 @@
#include <stdbool.h>
#include "./point.h"
#include "./subtree.h"
#include "./tree.h"
#include "./language.h"
typedef struct {
Subtree parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} NodeChildIterator;
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous);
// TSNode - constructors
TSNode ts_node_new(
const TSTree *tree,
const Subtree *subtree,
Length position,
TSSymbol alias
) {
return (TSNode) {
{position.bytes, position.extent.row, position.extent.column, alias},
subtree,
tree,
};
}
static inline TSNode ts_node__null(void) {
return ts_node_new(NULL, NULL, length_zero(), 0);
}
// TSNode - accessors
uint32_t ts_node_start_byte(TSNode self) {
return self.context[0];
}
TSPoint ts_node_start_point(TSNode self) {
return (TSPoint) {self.context[1], self.context[2]};
}
static inline uint32_t ts_node__alias(const TSNode *self) {
return self->context[3];
}
static inline Subtree ts_node__subtree(TSNode self) {
return *(const Subtree *)self.id;
}
// NodeChildIterator
static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
Subtree subtree = ts_node__subtree(*node);
if (ts_subtree_child_count(subtree) == 0) {
return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
node->tree->language,
subtree.ptr->production_id
);
return (NodeChildIterator) {
.tree = node->tree,
.parent = subtree,
.position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
.child_index = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
return self->child_index == self->parent.ptr->child_count;
}
static inline bool ts_node_child_iterator_next(
NodeChildIterator *self,
TSNode *result
) {
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child)) {
if (self->alias_sequence) {
alias_symbol = self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
if (self->child_index > 0) {
self->position = length_add(self->position, ts_subtree_padding(*child));
}
*result = ts_node_new(
self->tree,
child,
self->position,
alias_symbol
);
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
return true;
}
// TSNode - private
static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
Subtree tree = ts_node__subtree(self);
if (include_anonymous) {
return ts_subtree_visible(tree) || ts_node__alias(&self);
} else {
TSSymbol alias = ts_node__alias(&self);
if (alias) {
return ts_language_symbol_metadata(self.tree->language, alias).named;
} else {
return ts_subtree_visible(tree) && ts_subtree_named(tree);
}
}
}
static inline uint32_t ts_node__relevant_child_count(
TSNode self,
bool include_anonymous
) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
if (include_anonymous) {
return tree.ptr->visible_child_count;
} else {
return tree.ptr->named_child_count;
}
} else {
return 0;
}
}
static inline TSNode ts_node__child(
TSNode self,
uint32_t child_index,
bool include_anonymous
) {
TSNode result = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
NodeChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, include_anonymous)) {
if (index == child_index) {
return child;
}
index++;
} else {
uint32_t grandchild_index = child_index - index;
uint32_t grandchild_count = ts_node__relevant_child_count(child, include_anonymous);
if (grandchild_index < grandchild_count) {
did_descend = true;
result = child;
child_index = grandchild_index;
break;
}
index += grandchild_count;
}
}
}
return ts_node__null();
}
static bool ts_subtree_has_trailing_empty_descendant(
Subtree self,
Subtree other
) {
for (unsigned i = ts_subtree_child_count(self) - 1; i + 1 > 0; i--) {
Subtree child = ts_subtree_children(self)[i];
if (ts_subtree_total_bytes(child) > 0) break;
if (child.ptr == other.ptr || ts_subtree_has_trailing_empty_descendant(child, other)) {
return true;
}
}
return false;
}
static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) {
Subtree self_subtree = ts_node__subtree(self);
bool self_is_empty = ts_subtree_total_bytes(self_subtree) == 0;
uint32_t target_end_byte = ts_node_end_byte(self);
TSNode node = ts_node_parent(self);
TSNode earlier_node = ts_node__null();
bool earlier_node_is_relevant = false;
while (!ts_node_is_null(node)) {
TSNode earlier_child = ts_node__null();
bool earlier_child_is_relevant = false;
bool found_child_containing_target = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (child.id == self.id) break;
if (iterator.position.bytes > target_end_byte) {
found_child_containing_target = true;
break;
}
if (iterator.position.bytes == target_end_byte &&
(!self_is_empty ||
ts_subtree_has_trailing_empty_descendant(ts_node__subtree(child), self_subtree))) {
found_child_containing_target = true;
break;
}
if (ts_node__is_relevant(child, include_anonymous)) {
earlier_child = child;
earlier_child_is_relevant = true;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
earlier_child = child;
earlier_child_is_relevant = false;
}
}
if (found_child_containing_target) {
if (!ts_node_is_null(earlier_child)) {
earlier_node = earlier_child;
earlier_node_is_relevant = earlier_child_is_relevant;
}
node = child;
} else if (earlier_child_is_relevant) {
return earlier_child;
} else if (!ts_node_is_null(earlier_child)) {
node = earlier_child;
} else if (earlier_node_is_relevant) {
return earlier_node;
} else {
node = earlier_node;
earlier_node = ts_node__null();
earlier_node_is_relevant = false;
}
}
return ts_node__null();
}
static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) {
uint32_t target_end_byte = ts_node_end_byte(self);
TSNode node = ts_node_parent(self);
TSNode later_node = ts_node__null();
bool later_node_is_relevant = false;
while (!ts_node_is_null(node)) {
TSNode later_child = ts_node__null();
bool later_child_is_relevant = false;
TSNode child_containing_target = ts_node__null();
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.position.bytes <= target_end_byte) continue;
uint32_t start_byte = ts_node_start_byte(self);
uint32_t child_start_byte = ts_node_start_byte(child);
bool is_empty = start_byte == target_end_byte;
bool contains_target = is_empty ?
child_start_byte < start_byte :
child_start_byte <= start_byte;
if (contains_target) {
if (ts_node__subtree(child).ptr != ts_node__subtree(self).ptr) {
child_containing_target = child;
}
} else if (ts_node__is_relevant(child, include_anonymous)) {
later_child = child;
later_child_is_relevant = true;
break;
} else if (ts_node__relevant_child_count(child, include_anonymous) > 0) {
later_child = child;
later_child_is_relevant = false;
break;
}
}
if (!ts_node_is_null(child_containing_target)) {
if (!ts_node_is_null(later_child)) {
later_node = later_child;
later_node_is_relevant = later_child_is_relevant;
}
node = child_containing_target;
} else if (later_child_is_relevant) {
return later_child;
} else if (!ts_node_is_null(later_child)) {
node = later_child;
} else if (later_node_is_relevant) {
return later_node;
} else {
node = later_node;
}
}
return ts_node__null();
}
static inline TSNode ts_node__first_child_for_byte(
TSNode self,
uint32_t goal,
bool include_anonymous
) {
TSNode node = self;
bool did_descend = true;
NodeChildIterator last_iterator;
bool has_last_iterator = false;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
loop:
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node_end_byte(child) > goal) {
if (ts_node__is_relevant(child, include_anonymous)) {
return child;
} else if (ts_node_child_count(child) > 0) {
if (iterator.child_index < ts_subtree_child_count(ts_node__subtree(child))) {
last_iterator = iterator;
has_last_iterator = true;
}
did_descend = true;
node = child;
break;
}
}
}
if (!did_descend && has_last_iterator) {
iterator = last_iterator;
has_last_iterator = false;
goto loop;
}
}
return ts_node__null();
}
static inline TSNode ts_node__descendant_for_byte_range(
TSNode self,
uint32_t range_start,
uint32_t range_end,
bool include_anonymous
) {
if (range_start > range_end) {
return ts_node__null();
}
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
uint32_t node_end = iterator.position.bytes;
// The end of this node must extend far enough forward to touch
// the end of the range
if (node_end < range_end) continue;
// ...and exceed the start of the range, unless the node itself is
// empty, in which case it must at least be equal to the start of the range.
bool is_empty = ts_node_start_byte(child) == node_end;
if (is_empty ? node_end < range_start : node_end <= range_start) continue;
// The start of this node must extend far enough backward to
// touch the start of the range.
if (range_start < ts_node_start_byte(child)) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) {
last_visible_node = node;
}
did_descend = true;
break;
}
}
return last_visible_node;
}
static inline TSNode ts_node__descendant_for_point_range(
TSNode self,
TSPoint range_start,
TSPoint range_end,
bool include_anonymous
) {
if (point_gt(range_start, range_end)) {
return ts_node__null();
}
TSNode node = self;
TSNode last_visible_node = self;
bool did_descend = true;
while (did_descend) {
did_descend = false;
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&node);
while (ts_node_child_iterator_next(&iterator, &child)) {
TSPoint node_end = iterator.position.extent;
// The end of this node must extend far enough forward to touch
// the end of the range
if (point_lt(node_end, range_end)) continue;
// ...and exceed the start of the range, unless the node itself is
// empty, in which case it must at least be equal to the start of the range.
bool is_empty = point_eq(ts_node_start_point(child), node_end);
if (is_empty ? point_lt(node_end, range_start) : point_lte(node_end, range_start)) {
continue;
}
// The start of this node must extend far enough backward to
// touch the start of the range.
if (point_lt(range_start, ts_node_start_point(child))) break;
node = child;
if (ts_node__is_relevant(node, include_anonymous)) {
last_visible_node = node;
}
did_descend = true;
break;
}
}
return last_visible_node;
}
// TSNode - public
uint32_t ts_node_end_byte(TSNode self) {
return ts_node_start_byte(self) + ts_subtree_size(ts_node__subtree(self)).bytes;
}
TSPoint ts_node_end_point(TSNode self) {
return point_add(ts_node_start_point(self), ts_subtree_size(ts_node__subtree(self)).extent);
}
TSSymbol ts_node_symbol(TSNode self) {
TSSymbol symbol = ts_node__alias(&self);
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
return ts_language_public_symbol(self.tree->language, symbol);
}
const char *ts_node_type(TSNode self) {
TSSymbol symbol = ts_node__alias(&self);
if (!symbol) symbol = ts_subtree_symbol(ts_node__subtree(self));
return ts_language_symbol_name(self.tree->language, symbol);
}
const TSLanguage *ts_node_language(TSNode self) {
return self.tree->language;
}
TSSymbol ts_node_grammar_symbol(TSNode self) {
return ts_subtree_symbol(ts_node__subtree(self));
}
const char *ts_node_grammar_type(TSNode self) {
TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self));
return ts_language_symbol_name(self.tree->language, symbol);
}
char *ts_node_string(TSNode self) {
TSSymbol alias_symbol = ts_node__alias(&self);
return ts_subtree_string(
ts_node__subtree(self),
alias_symbol,
ts_language_symbol_metadata(self.tree->language, alias_symbol).visible,
self.tree->language,
false
);
}
bool ts_node_eq(TSNode self, TSNode other) {
return self.tree == other.tree && self.id == other.id;
}
bool ts_node_is_null(TSNode self) {
return self.id == 0;
}
bool ts_node_is_extra(TSNode self) {
return ts_subtree_extra(ts_node__subtree(self));
}
bool ts_node_is_named(TSNode self) {
TSSymbol alias = ts_node__alias(&self);
return alias
? ts_language_symbol_metadata(self.tree->language, alias).named
: ts_subtree_named(ts_node__subtree(self));
}
bool ts_node_is_missing(TSNode self) {
return ts_subtree_missing(ts_node__subtree(self));
}
bool ts_node_has_changes(TSNode self) {
return ts_subtree_has_changes(ts_node__subtree(self));
}
bool ts_node_has_error(TSNode self) {
return ts_subtree_error_cost(ts_node__subtree(self)) > 0;
}
bool ts_node_is_error(TSNode self) {
TSSymbol symbol = ts_node_symbol(self);
return symbol == ts_builtin_sym_error;
}
uint32_t ts_node_descendant_count(TSNode self) {
return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1;
}
TSStateId ts_node_parse_state(TSNode self) {
return ts_subtree_parse_state(ts_node__subtree(self));
}
TSStateId ts_node_next_parse_state(TSNode self) {
const TSLanguage *language = self.tree->language;
uint16_t state = ts_node_parse_state(self);
if (state == TS_TREE_STATE_NONE) {
return TS_TREE_STATE_NONE;
}
uint16_t symbol = ts_node_grammar_symbol(self);
return ts_language_next_state(language, state, symbol);
}
TSNode ts_node_parent(TSNode self) {
TSNode node = ts_tree_root_node(self.tree);
if (node.id == self.id) return ts_node__null();
while (true) {
TSNode next_node = ts_node_child_with_descendant(node, self);
if (next_node.id == self.id || ts_node_is_null(next_node)) break;
node = next_node;
}
return node;
}
TSNode ts_node_child_with_descendant(TSNode self, TSNode descendant) {
uint32_t start_byte = ts_node_start_byte(descendant);
uint32_t end_byte = ts_node_end_byte(descendant);
bool is_empty = start_byte == end_byte;
do {
NodeChildIterator iter = ts_node_iterate_children(&self);
do {
if (
!ts_node_child_iterator_next(&iter, &self)
|| ts_node_start_byte(self) > start_byte
) {
return ts_node__null();
}
if (self.id == descendant.id) {
return self;
}
// If the descendant is empty, and the end byte is within `self`,
// we check whether `self` contains it or not.
if (is_empty && iter.position.bytes >= end_byte && ts_node_child_count(self) > 0) {
TSNode child = ts_node_child_with_descendant(self, descendant);
// If the child is not null, return self if it's relevant, else return the child
if (!ts_node_is_null(child)) {
return ts_node__is_relevant(self, true) ? self : child;
}
}
} while ((is_empty ? iter.position.bytes <= end_byte : iter.position.bytes < end_byte) || ts_node_child_count(self) == 0);
} while (!ts_node__is_relevant(self, true));
return self;
}
TSNode ts_node_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, true);
}
TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, false);
}
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
recur:
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->production_id,
&field_map,
&field_map_end
);
if (field_map == field_map_end) return ts_node__null();
// The field mappings are sorted by their field id. Scan all
// the mappings to find the ones for the given field id.
while (field_map->field_id < field_id) {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
while (field_map_end[-1].field_id > field_id) {
field_map_end--;
if (field_map == field_map_end) return ts_node__null();
}
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&self);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (!ts_subtree_extra(ts_node__subtree(child))) {
uint32_t index = iterator.structural_child_index - 1;
if (index < field_map->child_index) continue;
// Hidden nodes' fields are "inherited" by their visible parent.
if (field_map->inherited) {
// If this is the *last* possible child node for this field,
// then perform a tail call to avoid recursion.
if (field_map + 1 == field_map_end) {
self = child;
goto recur;
}
// Otherwise, descend into this child, but if it doesn't contain
// the field, continue searching subsequent children.
else {
TSNode result = ts_node_child_by_field_id(child, field_id);
if (result.id) return result;
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
else if (ts_node__is_relevant(child, true)) {
return child;
}
// If the field refers to a hidden node with visible children,
// return the first visible child.
else if (ts_node_child_count(child) > 0 ) {
return ts_node_child(child, 0);
}
// Otherwise, continue searching subsequent children.
else {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
}
return ts_node__null();
}
static inline const char *ts_node__field_name_from_language(TSNode self, uint32_t structural_child_index) {
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->production_id,
&field_map,
&field_map_end
);
for (; field_map != field_map_end; field_map++) {
if (!field_map->inherited && field_map->child_index == structural_child_index) {
return self.tree->language->field_names[field_map->field_id];
}
}
return NULL;
}
const char *ts_node_field_name_for_child(TSNode self, uint32_t child_index) {
TSNode result = self;
bool did_descend = true;
const char *inherited_field_name = NULL;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
NodeChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, true)) {
if (index == child_index) {
if (ts_node_is_extra(child)) {
return NULL;
}
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
if (field_name) return field_name;
return inherited_field_name;
}
index++;
} else {
uint32_t grandchild_index = child_index - index;
uint32_t grandchild_count = ts_node__relevant_child_count(child, true);
if (grandchild_index < grandchild_count) {
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
if (field_name) inherited_field_name = field_name;
did_descend = true;
result = child;
child_index = grandchild_index;
break;
}
index += grandchild_count;
}
}
}
return NULL;
}
const char *ts_node_field_name_for_named_child(TSNode self, uint32_t named_child_index) {
TSNode result = self;
bool did_descend = true;
const char *inherited_field_name = NULL;
while (did_descend) {
did_descend = false;
TSNode child;
uint32_t index = 0;
NodeChildIterator iterator = ts_node_iterate_children(&result);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (ts_node__is_relevant(child, false)) {
if (index == named_child_index) {
if (ts_node_is_extra(child)) {
return NULL;
}
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
if (field_name) return field_name;
return inherited_field_name;
}
index++;
} else {
uint32_t named_grandchild_index = named_child_index - index;
uint32_t grandchild_count = ts_node__relevant_child_count(child, false);
if (named_grandchild_index < grandchild_count) {
const char *field_name = ts_node__field_name_from_language(result, iterator.structural_child_index - 1);
if (field_name) inherited_field_name = field_name;
did_descend = true;
result = child;
named_child_index = named_grandchild_index;
break;
}
index += grandchild_count;
}
}
}
return NULL;
}
TSNode ts_node_child_by_field_name(
TSNode self,
const char *name,
uint32_t name_length
) {
TSFieldId field_id = ts_language_field_id_for_name(
self.tree->language,
name,
name_length
);
return ts_node_child_by_field_id(self, field_id);
}
uint32_t ts_node_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
return tree.ptr->visible_child_count;
} else {
return 0;
}
}
uint32_t ts_node_named_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {
return tree.ptr->named_child_count;
} else {
return 0;
}
}
TSNode ts_node_next_sibling(TSNode self) {
return ts_node__next_sibling(self, true);
}
TSNode ts_node_next_named_sibling(TSNode self) {
return ts_node__next_sibling(self, false);
}
TSNode ts_node_prev_sibling(TSNode self) {
return ts_node__prev_sibling(self, true);
}
TSNode ts_node_prev_named_sibling(TSNode self) {
return ts_node__prev_sibling(self, false);
}
TSNode ts_node_first_child_for_byte(TSNode self, uint32_t byte) {
return ts_node__first_child_for_byte(self, byte, true);
}
TSNode ts_node_first_named_child_for_byte(TSNode self, uint32_t byte) {
return ts_node__first_child_for_byte(self, byte, false);
}
TSNode ts_node_descendant_for_byte_range(
TSNode self,
uint32_t start,
uint32_t end
) {
return ts_node__descendant_for_byte_range(self, start, end, true);
}
TSNode ts_node_named_descendant_for_byte_range(
TSNode self,
uint32_t start,
uint32_t end
) {
return ts_node__descendant_for_byte_range(self, start, end, false);
}
TSNode ts_node_descendant_for_point_range(
TSNode self,
TSPoint start,
TSPoint end
) {
return ts_node__descendant_for_point_range(self, start, end, true);
}
TSNode ts_node_named_descendant_for_point_range(
TSNode self,
TSPoint start,
TSPoint end
) {
return ts_node__descendant_for_point_range(self, start, end, false);
}
void ts_node_edit(TSNode *self, const TSInputEdit *edit) {
uint32_t start_byte = ts_node_start_byte(*self);
TSPoint start_point = ts_node_start_point(*self);
if (start_byte >= edit->old_end_byte) {
start_byte = edit->new_end_byte + (start_byte - edit->old_end_byte);
start_point = point_add(edit->new_end_point, point_sub(start_point, edit->old_end_point));
} else if (start_byte > edit->start_byte) {
start_byte = edit->new_end_byte;
start_point = edit->new_end_point;
}
self->context[0] = start_byte;
self->context[1] = start_point.row;
self->context[2] = start_point.column;
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,286 @@
#ifndef TREE_SITTER_PARSER_H_
#define TREE_SITTER_PARSER_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <stdbool.h>
#include <stdint.h>
#include <stdlib.h>
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
#define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSLanguageMetadata {
uint8_t major_version;
uint8_t minor_version;
uint8_t patch_version;
} TSLanguageMetadata;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
// Used to index the field and supertype maps.
typedef struct {
uint16_t index;
uint16_t length;
} TSMapSlice;
typedef struct {
bool visible;
bool named;
bool supertype;
} TSSymbolMetadata;
typedef struct TSLexer TSLexer;
struct TSLexer {
int32_t lookahead;
TSSymbol result_symbol;
void (*advance)(TSLexer *, bool);
void (*mark_end)(TSLexer *);
uint32_t (*get_column)(TSLexer *);
bool (*is_at_included_range_start)(const TSLexer *);
bool (*eof)(const TSLexer *);
void (*log)(const TSLexer *, const char *, ...);
};
typedef enum {
TSParseActionTypeShift,
TSParseActionTypeReduce,
TSParseActionTypeAccept,
TSParseActionTypeRecover,
} TSParseActionType;
typedef union {
struct {
uint8_t type;
TSStateId state;
bool extra;
bool repetition;
} shift;
struct {
uint8_t type;
uint8_t child_count;
TSSymbol symbol;
int16_t dynamic_precedence;
uint16_t production_id;
} reduce;
uint8_t type;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
uint16_t reserved_word_set_id;
} TSLexerMode;
typedef union {
TSParseAction action;
struct {
uint8_t count;
bool reusable;
} entry;
} TSParseActionEntry;
typedef struct {
int32_t start;
int32_t end;
} TSCharacterRange;
struct TSLanguage {
uint32_t abi_version;
uint32_t symbol_count;
uint32_t alias_count;
uint32_t token_count;
uint32_t external_token_count;
uint32_t state_count;
uint32_t large_state_count;
uint32_t production_id_count;
uint32_t field_count;
uint16_t max_alias_sequence_length;
const uint16_t *parse_table;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;
const TSParseActionEntry *parse_actions;
const char * const *symbol_names;
const char * const *field_names;
const TSMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const TSSymbolMetadata *symbol_metadata;
const TSSymbol *public_symbol_map;
const uint16_t *alias_map;
const TSSymbol *alias_sequences;
const TSLexerMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
TSSymbol keyword_capture_token;
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)(void);
void (*destroy)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
const TSStateId *primary_state_ids;
const char *name;
const TSSymbol *reserved_words;
uint16_t max_reserved_word_set_size;
uint32_t supertype_count;
const TSSymbol *supertype_symbols;
const TSMapSlice *supertype_map_slices;
const TSSymbol *supertype_map_entries;
TSLanguageMetadata metadata;
};
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
index = mid_index;
}
size -= half_size;
}
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}
/*
* Lexer Macros
*/
#ifdef _MSC_VER
#define UNUSED __pragma(warning(suppress : 4101))
#else
#define UNUSED __attribute__((unused))
#endif
#define START_LEXER() \
bool result = false; \
bool skip = false; \
UNUSED \
bool eof = false; \
int32_t lookahead; \
goto start; \
next_state: \
lexer->advance(lexer, skip); \
start: \
skip = false; \
lookahead = lexer->lookahead;
#define ADVANCE(state_value) \
{ \
state = state_value; \
goto next_state; \
}
#define ADVANCE_MAP(...) \
{ \
static const uint16_t map[] = { __VA_ARGS__ }; \
for (uint32_t i = 0; i < sizeof(map) / sizeof(map[0]); i += 2) { \
if (map[i] == lookahead) { \
state = map[i + 1]; \
goto next_state; \
} \
} \
}
#define SKIP(state_value) \
{ \
skip = true; \
state = state_value; \
goto next_state; \
}
#define ACCEPT_TOKEN(symbol_value) \
result = true; \
lexer->result_symbol = symbol_value; \
lexer->mark_end(lexer);
#define END_STATE() return result;
/*
* Parse Table Macros
*/
#define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value) \
} \
}}
#define SHIFT_REPEAT(state_value) \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.state = (state_value), \
.repetition = true \
} \
}}
#define SHIFT_EXTRA() \
{{ \
.shift = { \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}}
#define REDUCE(symbol_name, children, precedence, prod_id) \
{{ \
.reduce = { \
.type = TSParseActionTypeReduce, \
.symbol = symbol_name, \
.child_count = children, \
.dynamic_precedence = precedence, \
.production_id = prod_id \
}, \
}}
#define RECOVER() \
{{ \
.type = TSParseActionTypeRecover \
}}
#define ACCEPT_INPUT() \
{{ \
.type = TSParseActionTypeAccept \
}}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSER_H_

View File

@ -0,0 +1,48 @@
#ifndef TREE_SITTER_POINT_H_
#define TREE_SITTER_POINT_H_
#include "tree_sitter/api.h"
#define POINT_ZERO ((TSPoint) {0, 0})
#define POINT_MAX ((TSPoint) {UINT32_MAX, UINT32_MAX})
static inline TSPoint point__new(unsigned row, unsigned column) {
TSPoint result = {row, column};
return result;
}
static inline TSPoint point_add(TSPoint a, TSPoint b) {
if (b.row > 0)
return point__new(a.row + b.row, b.column);
else
return point__new(a.row, a.column + b.column);
}
static inline TSPoint point_sub(TSPoint a, TSPoint b) {
if (a.row > b.row)
return point__new(a.row - b.row, a.column);
else
return point__new(0, (a.column >= b.column) ? a.column - b.column : 0);
}
static inline bool point_lte(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column <= b.column);
}
static inline bool point_lt(TSPoint a, TSPoint b) {
return (a.row < b.row) || (a.row == b.row && a.column < b.column);
}
static inline bool point_gt(TSPoint a, TSPoint b) {
return (a.row > b.row) || (a.row == b.row && a.column > b.column);
}
static inline bool point_gte(TSPoint a, TSPoint b) {
return (a.row > b.row) || (a.row == b.row && a.column >= b.column);
}
static inline bool point_eq(TSPoint a, TSPoint b) {
return a.row == b.row && a.column == b.column;
}
#endif

View File

@ -0,0 +1,239 @@
// "License": Public Domain
// I, Mathias Panzenböck, place this file hereby into the public domain. Use it at your own risk for whatever you like.
// In case there are jurisdictions that don't support putting things in the public domain you can also consider it to
// be "dual licensed" under the BSD, MIT and Apache licenses, if you want to. This code is trivial anyway. Consider it
// an example on how to get the endian conversion functions on different platforms.
// updates from https://github.com/mikepb/endian.h/issues/4
#ifndef ENDIAN_H
#define ENDIAN_H
#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
# define __WINDOWS__
#endif
#if defined(HAVE_ENDIAN_H) || \
defined(__linux__) || \
defined(__GNU__) || \
defined(__illumos__) || \
defined(__NetBSD__) || \
defined(__OpenBSD__) || \
defined(__CYGWIN__) || \
defined(__MSYS__) || \
defined(__EMSCRIPTEN__) || \
defined(__wasi__)
#if defined(__NetBSD__)
#define _NETBSD_SOURCE 1
#endif
# include <endian.h>
#elif defined(HAVE_SYS_ENDIAN_H) || \
defined(__FreeBSD__) || \
defined(__DragonFly__)
# include <sys/endian.h>
#elif defined(__APPLE__)
# define __BYTE_ORDER BYTE_ORDER
# define __BIG_ENDIAN BIG_ENDIAN
# define __LITTLE_ENDIAN LITTLE_ENDIAN
# define __PDP_ENDIAN PDP_ENDIAN
# if !defined(_POSIX_C_SOURCE)
# include <libkern/OSByteOrder.h>
# define htobe16(x) OSSwapHostToBigInt16(x)
# define htole16(x) OSSwapHostToLittleInt16(x)
# define be16toh(x) OSSwapBigToHostInt16(x)
# define le16toh(x) OSSwapLittleToHostInt16(x)
# define htobe32(x) OSSwapHostToBigInt32(x)
# define htole32(x) OSSwapHostToLittleInt32(x)
# define be32toh(x) OSSwapBigToHostInt32(x)
# define le32toh(x) OSSwapLittleToHostInt32(x)
# define htobe64(x) OSSwapHostToBigInt64(x)
# define htole64(x) OSSwapHostToLittleInt64(x)
# define be64toh(x) OSSwapBigToHostInt64(x)
# define le64toh(x) OSSwapLittleToHostInt64(x)
# else
# if BYTE_ORDER == LITTLE_ENDIAN
# define htobe16(x) __builtin_bswap16(x)
# define htole16(x) (x)
# define be16toh(x) __builtin_bswap16(x)
# define le16toh(x) (x)
# define htobe32(x) __builtin_bswap32(x)
# define htole32(x) (x)
# define be32toh(x) __builtin_bswap32(x)
# define le32toh(x) (x)
# define htobe64(x) __builtin_bswap64(x)
# define htole64(x) (x)
# define be64toh(x) __builtin_bswap64(x)
# define le64toh(x) (x)
# elif BYTE_ORDER == BIG_ENDIAN
# define htobe16(x) (x)
# define htole16(x) __builtin_bswap16(x)
# define be16toh(x) (x)
# define le16toh(x) __builtin_bswap16(x)
# define htobe32(x) (x)
# define htole32(x) __builtin_bswap32(x)
# define be32toh(x) (x)
# define le32toh(x) __builtin_bswap32(x)
# define htobe64(x) (x)
# define htole64(x) __builtin_bswap64(x)
# define be64toh(x) (x)
# define le64toh(x) __builtin_bswap64(x)
# else
# error byte order not supported
# endif
# endif
#elif defined(__WINDOWS__)
# if defined(_MSC_VER) && !defined(__clang__)
# include <stdlib.h>
# define B_SWAP_16(x) _byteswap_ushort(x)
# define B_SWAP_32(x) _byteswap_ulong(x)
# define B_SWAP_64(x) _byteswap_uint64(x)
# else
# define B_SWAP_16(x) __builtin_bswap16(x)
# define B_SWAP_32(x) __builtin_bswap32(x)
# define B_SWAP_64(x) __builtin_bswap64(x)
# endif
# if defined(__MINGW32__) || defined(HAVE_SYS_PARAM_H)
# include <sys/param.h>
# endif
# ifndef BIG_ENDIAN
# ifdef __BIG_ENDIAN
# define BIG_ENDIAN __BIG_ENDIAN
# elif defined(__ORDER_BIG_ENDIAN__)
# define BIG_ENDIAN __ORDER_BIG_ENDIAN__
# else
# define BIG_ENDIAN 4321
# endif
# endif
# ifndef LITTLE_ENDIAN
# ifdef __LITTLE_ENDIAN
# define LITTLE_ENDIAN __LITTLE_ENDIAN
# elif defined(__ORDER_LITTLE_ENDIAN__)
# define LITTLE_ENDIAN __ORDER_LITTLE_ENDIAN__
# else
# define LITTLE_ENDIAN 1234
# endif
# endif
# ifndef BYTE_ORDER
# ifdef __BYTE_ORDER
# define BYTE_ORDER __BYTE_ORDER
# elif defined(__BYTE_ORDER__)
# define BYTE_ORDER __BYTE_ORDER__
# else
/* assume LE on Windows if nothing was defined */
# define BYTE_ORDER LITTLE_ENDIAN
# endif
# endif
# if BYTE_ORDER == LITTLE_ENDIAN
# define htobe16(x) B_SWAP_16(x)
# define htole16(x) (x)
# define be16toh(x) B_SWAP_16(x)
# define le16toh(x) (x)
# define htobe32(x) B_SWAP_32(x)
# define htole32(x) (x)
# define be32toh(x) B_SWAP_32(x)
# define le32toh(x) (x)
# define htobe64(x) B_SWAP_64(x)
# define htole64(x) (x)
# define be64toh(x) B_SWAP_64(x)
# define le64toh(x) (x)
# elif BYTE_ORDER == BIG_ENDIAN
# define htobe16(x) (x)
# define htole16(x) B_SWAP_16(x)
# define be16toh(x) (x)
# define le16toh(x) B_SWAP_16(x)
# define htobe32(x) (x)
# define htole32(x) B_SWAP_32(x)
# define be32toh(x) (x)
# define le32toh(x) B_SWAP_32(x)
# define htobe64(x) (x)
# define htole64(x) B_SWAP_64(x)
# define be64toh(x) (x)
# define le64toh(x) B_SWAP_64(x)
# else
# error byte order not supported
# endif
#elif defined(__QNXNTO__)
# include <gulliver.h>
# define __LITTLE_ENDIAN 1234
# define __BIG_ENDIAN 4321
# define __PDP_ENDIAN 3412
# if defined(__BIGENDIAN__)
# define __BYTE_ORDER __BIG_ENDIAN
# define htobe16(x) (x)
# define htobe32(x) (x)
# define htobe64(x) (x)
# define htole16(x) ENDIAN_SWAP16(x)
# define htole32(x) ENDIAN_SWAP32(x)
# define htole64(x) ENDIAN_SWAP64(x)
# elif defined(__LITTLEENDIAN__)
# define __BYTE_ORDER __LITTLE_ENDIAN
# define htole16(x) (x)
# define htole32(x) (x)
# define htole64(x) (x)
# define htobe16(x) ENDIAN_SWAP16(x)
# define htobe32(x) ENDIAN_SWAP32(x)
# define htobe64(x) ENDIAN_SWAP64(x)
# else
# error byte order not supported
# endif
# define be16toh(x) ENDIAN_BE16(x)
# define be32toh(x) ENDIAN_BE32(x)
# define be64toh(x) ENDIAN_BE64(x)
# define le16toh(x) ENDIAN_LE16(x)
# define le32toh(x) ENDIAN_LE32(x)
# define le64toh(x) ENDIAN_LE64(x)
#else
# error platform not supported
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
#ifndef TREE_SITTER_REDUCE_ACTION_H_
#define TREE_SITTER_REDUCE_ACTION_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "tree_sitter/api.h"
typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short production_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;
static inline void ts_reduce_action_set_add(ReduceActionSet *self,
ReduceAction new_action) {
for (uint32_t i = 0; i < self->size; i++) {
ReduceAction action = self->contents[i];
if (action.symbol == new_action.symbol && action.count == new_action.count)
return;
}
array_push(self, new_action);
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_REDUCE_ACTION_H_

View File

@ -0,0 +1,95 @@
#include "./subtree.h"
typedef struct {
Subtree tree;
uint32_t child_index;
uint32_t byte_offset;
} StackEntry;
typedef struct {
Array(StackEntry) stack;
Subtree last_external_token;
} ReusableNode;
static inline ReusableNode reusable_node_new(void) {
return (ReusableNode) {array_new(), NULL_SUBTREE};
}
static inline void reusable_node_clear(ReusableNode *self) {
array_clear(&self->stack);
self->last_external_token = NULL_SUBTREE;
}
static inline Subtree reusable_node_tree(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].tree
: NULL_SUBTREE;
}
static inline uint32_t reusable_node_byte_offset(ReusableNode *self) {
return self->stack.size > 0
? self->stack.contents[self->stack.size - 1].byte_offset
: UINT32_MAX;
}
static inline void reusable_node_delete(ReusableNode *self) {
array_delete(&self->stack);
}
static inline void reusable_node_advance(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
uint32_t byte_offset = last_entry.byte_offset + ts_subtree_total_bytes(last_entry.tree);
if (ts_subtree_has_external_tokens(last_entry.tree)) {
self->last_external_token = ts_subtree_last_external_token(last_entry.tree);
}
Subtree tree;
uint32_t next_index;
do {
StackEntry popped_entry = array_pop(&self->stack);
next_index = popped_entry.child_index + 1;
if (self->stack.size == 0) return;
tree = array_back(&self->stack)->tree;
} while (ts_subtree_child_count(tree) <= next_index);
array_push(&self->stack, ((StackEntry) {
.tree = ts_subtree_children(tree)[next_index],
.child_index = next_index,
.byte_offset = byte_offset,
}));
}
static inline bool reusable_node_descend(ReusableNode *self) {
StackEntry last_entry = *array_back(&self->stack);
if (ts_subtree_child_count(last_entry.tree) > 0) {
array_push(&self->stack, ((StackEntry) {
.tree = ts_subtree_children(last_entry.tree)[0],
.child_index = 0,
.byte_offset = last_entry.byte_offset,
}));
return true;
} else {
return false;
}
}
static inline void reusable_node_advance_past_leaf(ReusableNode *self) {
while (reusable_node_descend(self)) {}
reusable_node_advance(self);
}
static inline void reusable_node_reset(ReusableNode *self, Subtree tree) {
reusable_node_clear(self);
array_push(&self->stack, ((StackEntry) {
.tree = tree,
.child_index = 0,
.byte_offset = 0,
}));
// Never reuse the root node, because it has a non-standard internal structure
// due to transformations that are applied when it is accepted: adding the EOF
// child and any extra children.
if (!reusable_node_descend(self)) {
reusable_node_clear(self);
}
}

View File

@ -0,0 +1,912 @@
#include "./alloc.h"
#include "./language.h"
#include "./subtree.h"
#include "./array.h"
#include "./stack.h"
#include "./length.h"
#include <assert.h>
#include <inttypes.h>
#include <stdio.h>
#define MAX_LINK_COUNT 8
#define MAX_NODE_POOL_SIZE 50
#define MAX_ITERATOR_COUNT 64
#if defined _WIN32 && !defined __GNUC__
#define forceinline __forceinline
#else
#define forceinline static inline __attribute__((always_inline))
#endif
typedef struct StackNode StackNode;
typedef struct {
StackNode *node;
Subtree subtree;
bool is_pending;
} StackLink;
struct StackNode {
TSStateId state;
Length position;
StackLink links[MAX_LINK_COUNT];
short unsigned int link_count;
uint32_t ref_count;
unsigned error_cost;
unsigned node_count;
int dynamic_precedence;
};
typedef struct {
StackNode *node;
SubtreeArray subtrees;
uint32_t subtree_count;
bool is_pending;
} StackIterator;
typedef Array(StackNode *) StackNodeArray;
typedef enum {
StackStatusActive,
StackStatusPaused,
StackStatusHalted,
} StackStatus;
typedef struct {
StackNode *node;
StackSummary *summary;
unsigned node_count_at_last_error;
Subtree last_external_token;
Subtree lookahead_when_paused;
StackStatus status;
} StackHead;
struct Stack {
Array(StackHead) heads;
StackSliceArray slices;
Array(StackIterator) iterators;
StackNodeArray node_pool;
StackNode *base_node;
SubtreePool *subtree_pool;
};
typedef unsigned StackAction;
enum {
StackActionNone,
StackActionStop = 1,
StackActionPop = 2,
};
typedef StackAction (*StackCallback)(void *, const StackIterator *);
static void stack_node_retain(StackNode *self) {
if (!self)
return;
ts_assert(self->ref_count > 0);
self->ref_count++;
ts_assert(self->ref_count != 0);
}
static void stack_node_release(
StackNode *self,
StackNodeArray *pool,
SubtreePool *subtree_pool
) {
recur:
ts_assert(self->ref_count != 0);
self->ref_count--;
if (self->ref_count > 0) return;
StackNode *first_predecessor = NULL;
if (self->link_count > 0) {
for (unsigned i = self->link_count - 1; i > 0; i--) {
StackLink link = self->links[i];
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
stack_node_release(link.node, pool, subtree_pool);
}
StackLink link = self->links[0];
if (link.subtree.ptr) ts_subtree_release(subtree_pool, link.subtree);
first_predecessor = self->links[0].node;
}
if (pool->size < MAX_NODE_POOL_SIZE) {
array_push(pool, self);
} else {
ts_free(self);
}
if (first_predecessor) {
self = first_predecessor;
goto recur;
}
}
/// Get the number of nodes in the subtree, for the purpose of measuring
/// how much progress has been made by a given version of the stack.
static uint32_t stack__subtree_node_count(Subtree subtree) {
uint32_t count = ts_subtree_visible_descendant_count(subtree);
if (ts_subtree_visible(subtree)) count++;
// Count intermediate error nodes even though they are not visible,
// because a stack version's node count is used to check whether it
// has made any progress since the last time it encountered an error.
if (ts_subtree_symbol(subtree) == ts_builtin_sym_error_repeat) count++;
return count;
}
static StackNode *stack_node_new(
StackNode *previous_node,
Subtree subtree,
bool is_pending,
TSStateId state,
StackNodeArray *pool
) {
StackNode *node = pool->size > 0
? array_pop(pool)
: ts_malloc(sizeof(StackNode));
*node = (StackNode) {
.ref_count = 1,
.link_count = 0,
.state = state
};
if (previous_node) {
node->link_count = 1;
node->links[0] = (StackLink) {
.node = previous_node,
.subtree = subtree,
.is_pending = is_pending,
};
node->position = previous_node->position;
node->error_cost = previous_node->error_cost;
node->dynamic_precedence = previous_node->dynamic_precedence;
node->node_count = previous_node->node_count;
if (subtree.ptr) {
node->error_cost += ts_subtree_error_cost(subtree);
node->position = length_add(node->position, ts_subtree_total_size(subtree));
node->node_count += stack__subtree_node_count(subtree);
node->dynamic_precedence += ts_subtree_dynamic_precedence(subtree);
}
} else {
node->position = length_zero();
node->error_cost = 0;
}
return node;
}
static bool stack__subtree_is_equivalent(Subtree left, Subtree right) {
if (left.ptr == right.ptr) return true;
if (!left.ptr || !right.ptr) return false;
// Symbols must match
if (ts_subtree_symbol(left) != ts_subtree_symbol(right)) return false;
// If both have errors, don't bother keeping both.
if (ts_subtree_error_cost(left) > 0 && ts_subtree_error_cost(right) > 0) return true;
return (
ts_subtree_padding(left).bytes == ts_subtree_padding(right).bytes &&
ts_subtree_size(left).bytes == ts_subtree_size(right).bytes &&
ts_subtree_child_count(left) == ts_subtree_child_count(right) &&
ts_subtree_extra(left) == ts_subtree_extra(right) &&
ts_subtree_external_scanner_state_eq(left, right)
);
}
static void stack_node_add_link(
StackNode *self,
StackLink link,
SubtreePool *subtree_pool
) {
if (link.node == self) return;
for (int i = 0; i < self->link_count; i++) {
StackLink *existing_link = &self->links[i];
if (stack__subtree_is_equivalent(existing_link->subtree, link.subtree)) {
// In general, we preserve ambiguities until they are removed from the stack
// during a pop operation where multiple paths lead to the same node. But in
// the special case where two links directly connect the same pair of nodes,
// we can safely remove the ambiguity ahead of time without changing behavior.
if (existing_link->node == link.node) {
if (
ts_subtree_dynamic_precedence(link.subtree) >
ts_subtree_dynamic_precedence(existing_link->subtree)
) {
ts_subtree_retain(link.subtree);
ts_subtree_release(subtree_pool, existing_link->subtree);
existing_link->subtree = link.subtree;
self->dynamic_precedence =
link.node->dynamic_precedence + ts_subtree_dynamic_precedence(link.subtree);
}
return;
}
// If the previous nodes are mergeable, merge them recursively.
if (
existing_link->node->state == link.node->state &&
existing_link->node->position.bytes == link.node->position.bytes &&
existing_link->node->error_cost == link.node->error_cost
) {
for (int j = 0; j < link.node->link_count; j++) {
stack_node_add_link(existing_link->node, link.node->links[j], subtree_pool);
}
int32_t dynamic_precedence = link.node->dynamic_precedence;
if (link.subtree.ptr) {
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
}
if (dynamic_precedence > self->dynamic_precedence) {
self->dynamic_precedence = dynamic_precedence;
}
return;
}
}
}
if (self->link_count == MAX_LINK_COUNT) return;
stack_node_retain(link.node);
unsigned node_count = link.node->node_count;
int dynamic_precedence = link.node->dynamic_precedence;
self->links[self->link_count++] = link;
if (link.subtree.ptr) {
ts_subtree_retain(link.subtree);
node_count += stack__subtree_node_count(link.subtree);
dynamic_precedence += ts_subtree_dynamic_precedence(link.subtree);
}
if (node_count > self->node_count) self->node_count = node_count;
if (dynamic_precedence > self->dynamic_precedence) self->dynamic_precedence = dynamic_precedence;
}
static void stack_head_delete(
StackHead *self,
StackNodeArray *pool,
SubtreePool *subtree_pool
) {
if (self->node) {
if (self->last_external_token.ptr) {
ts_subtree_release(subtree_pool, self->last_external_token);
}
if (self->lookahead_when_paused.ptr) {
ts_subtree_release(subtree_pool, self->lookahead_when_paused);
}
if (self->summary) {
array_delete(self->summary);
ts_free(self->summary);
}
stack_node_release(self->node, pool, subtree_pool);
}
}
static StackVersion ts_stack__add_version(
Stack *self,
StackVersion original_version,
StackNode *node
) {
StackHead head = {
.node = node,
.node_count_at_last_error = array_get(&self->heads, original_version)->node_count_at_last_error,
.last_external_token = array_get(&self->heads, original_version)->last_external_token,
.status = StackStatusActive,
.lookahead_when_paused = NULL_SUBTREE,
};
array_push(&self->heads, head);
stack_node_retain(node);
if (head.last_external_token.ptr) ts_subtree_retain(head.last_external_token);
return (StackVersion)(self->heads.size - 1);
}
static void ts_stack__add_slice(
Stack *self,
StackVersion original_version,
StackNode *node,
SubtreeArray *subtrees
) {
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
StackVersion version = array_get(&self->slices, i)->version;
if (array_get(&self->heads, version)->node == node) {
StackSlice slice = {*subtrees, version};
array_insert(&self->slices, i + 1, slice);
return;
}
}
StackVersion version = ts_stack__add_version(self, original_version, node);
StackSlice slice = { *subtrees, version };
array_push(&self->slices, slice);
}
static StackSliceArray stack__iter(
Stack *self,
StackVersion version,
StackCallback callback,
void *payload,
int goal_subtree_count
) {
array_clear(&self->slices);
array_clear(&self->iterators);
StackHead *head = array_get(&self->heads, version);
StackIterator new_iterator = {
.node = head->node,
.subtrees = array_new(),
.subtree_count = 0,
.is_pending = true,
};
bool include_subtrees = false;
if (goal_subtree_count >= 0) {
include_subtrees = true;
array_reserve(&new_iterator.subtrees, (uint32_t)ts_subtree_alloc_size(goal_subtree_count) / sizeof(Subtree));
}
array_push(&self->iterators, new_iterator);
while (self->iterators.size > 0) {
for (uint32_t i = 0, size = self->iterators.size; i < size; i++) {
StackIterator *iterator = array_get(&self->iterators, i);
StackNode *node = iterator->node;
StackAction action = callback(payload, iterator);
bool should_pop = action & StackActionPop;
bool should_stop = action & StackActionStop || node->link_count == 0;
if (should_pop) {
SubtreeArray subtrees = iterator->subtrees;
if (!should_stop) {
ts_subtree_array_copy(subtrees, &subtrees);
}
ts_subtree_array_reverse(&subtrees);
ts_stack__add_slice(
self,
version,
node,
&subtrees
);
}
if (should_stop) {
if (!should_pop) {
ts_subtree_array_delete(self->subtree_pool, &iterator->subtrees);
}
array_erase(&self->iterators, i);
i--, size--;
continue;
}
for (uint32_t j = 1; j <= node->link_count; j++) {
StackIterator *next_iterator;
StackLink link;
if (j == node->link_count) {
link = node->links[0];
next_iterator = array_get(&self->iterators, i);
} else {
if (self->iterators.size >= MAX_ITERATOR_COUNT) continue;
link = node->links[j];
StackIterator current_iterator = *array_get(&self->iterators, i);
array_push(&self->iterators, current_iterator);
next_iterator = array_back(&self->iterators);
ts_subtree_array_copy(next_iterator->subtrees, &next_iterator->subtrees);
}
next_iterator->node = link.node;
if (link.subtree.ptr) {
if (include_subtrees) {
array_push(&next_iterator->subtrees, link.subtree);
ts_subtree_retain(link.subtree);
}
if (!ts_subtree_extra(link.subtree)) {
next_iterator->subtree_count++;
if (!link.is_pending) {
next_iterator->is_pending = false;
}
}
} else {
next_iterator->subtree_count++;
next_iterator->is_pending = false;
}
}
}
}
return self->slices;
}
Stack *ts_stack_new(SubtreePool *subtree_pool) {
Stack *self = ts_calloc(1, sizeof(Stack));
array_init(&self->heads);
array_init(&self->slices);
array_init(&self->iterators);
array_init(&self->node_pool);
array_reserve(&self->heads, 4);
array_reserve(&self->slices, 4);
array_reserve(&self->iterators, 4);
array_reserve(&self->node_pool, MAX_NODE_POOL_SIZE);
self->subtree_pool = subtree_pool;
self->base_node = stack_node_new(NULL, NULL_SUBTREE, false, 1, &self->node_pool);
ts_stack_clear(self);
return self;
}
void ts_stack_delete(Stack *self) {
if (self->slices.contents)
array_delete(&self->slices);
if (self->iterators.contents)
array_delete(&self->iterators);
stack_node_release(self->base_node, &self->node_pool, self->subtree_pool);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(array_get(&self->heads, i), &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
if (self->node_pool.contents) {
for (uint32_t i = 0; i < self->node_pool.size; i++)
ts_free(*array_get(&self->node_pool, i));
array_delete(&self->node_pool);
}
array_delete(&self->heads);
ts_free(self);
}
uint32_t ts_stack_version_count(const Stack *self) {
return self->heads.size;
}
uint32_t ts_stack_halted_version_count(Stack *self) {
uint32_t count = 0;
for (uint32_t i = 0; i < self->heads.size; i++) {
StackHead *head = array_get(&self->heads, i);
if (head->status == StackStatusHalted) {
count++;
}
}
return count;
}
TSStateId ts_stack_state(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->state;
}
Length ts_stack_position(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->position;
}
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->last_external_token;
}
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token) {
StackHead *head = array_get(&self->heads, version);
if (token.ptr) ts_subtree_retain(token);
if (head->last_external_token.ptr) ts_subtree_release(self->subtree_pool, head->last_external_token);
head->last_external_token = token;
}
unsigned ts_stack_error_cost(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
unsigned result = head->node->error_cost;
if (
head->status == StackStatusPaused ||
(head->node->state == ERROR_STATE && !head->node->links[0].subtree.ptr)) {
result += ERROR_COST_PER_RECOVERY;
}
return result;
}
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
if (head->node->node_count < head->node_count_at_last_error) {
head->node_count_at_last_error = head->node->node_count;
}
return head->node->node_count - head->node_count_at_last_error;
}
void ts_stack_push(
Stack *self,
StackVersion version,
Subtree subtree,
bool pending,
TSStateId state
) {
StackHead *head = array_get(&self->heads, version);
StackNode *new_node = stack_node_new(head->node, subtree, pending, state, &self->node_pool);
if (!subtree.ptr) head->node_count_at_last_error = new_node->node_count;
head->node = new_node;
}
forceinline StackAction pop_count_callback(void *payload, const StackIterator *iterator) {
unsigned *goal_subtree_count = payload;
if (iterator->subtree_count == *goal_subtree_count) {
return StackActionPop | StackActionStop;
} else {
return StackActionNone;
}
}
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count) {
return stack__iter(self, version, pop_count_callback, &count, (int)count);
}
forceinline StackAction pop_pending_callback(void *payload, const StackIterator *iterator) {
(void)payload;
if (iterator->subtree_count >= 1) {
if (iterator->is_pending) {
return StackActionPop | StackActionStop;
} else {
return StackActionStop;
}
} else {
return StackActionNone;
}
}
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version) {
StackSliceArray pop = stack__iter(self, version, pop_pending_callback, NULL, 0);
if (pop.size > 0) {
ts_stack_renumber_version(self, array_get(&pop, 0)->version, version);
array_get(&pop, 0)->version = version;
}
return pop;
}
forceinline StackAction pop_error_callback(void *payload, const StackIterator *iterator) {
if (iterator->subtrees.size > 0) {
bool *found_error = payload;
if (!*found_error && ts_subtree_is_error(*array_get(&iterator->subtrees, 0))) {
*found_error = true;
return StackActionPop | StackActionStop;
} else {
return StackActionStop;
}
} else {
return StackActionNone;
}
}
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version) {
StackNode *node = array_get(&self->heads, version)->node;
for (unsigned i = 0; i < node->link_count; i++) {
if (node->links[i].subtree.ptr && ts_subtree_is_error(node->links[i].subtree)) {
bool found_error = false;
StackSliceArray pop = stack__iter(self, version, pop_error_callback, &found_error, 1);
if (pop.size > 0) {
ts_assert(pop.size == 1);
ts_stack_renumber_version(self, array_get(&pop, 0)->version, version);
return array_get(&pop, 0)->subtrees;
}
break;
}
}
return (SubtreeArray) {.size = 0};
}
forceinline StackAction pop_all_callback(void *payload, const StackIterator *iterator) {
(void)payload;
return iterator->node->link_count == 0 ? StackActionPop : StackActionNone;
}
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version) {
return stack__iter(self, version, pop_all_callback, NULL, 0);
}
typedef struct {
StackSummary *summary;
unsigned max_depth;
} SummarizeStackSession;
forceinline StackAction summarize_stack_callback(void *payload, const StackIterator *iterator) {
SummarizeStackSession *session = payload;
TSStateId state = iterator->node->state;
unsigned depth = iterator->subtree_count;
if (depth > session->max_depth) return StackActionStop;
for (unsigned i = session->summary->size - 1; i + 1 > 0; i--) {
StackSummaryEntry entry = *array_get(session->summary, i);
if (entry.depth < depth) break;
if (entry.depth == depth && entry.state == state) return StackActionNone;
}
array_push(session->summary, ((StackSummaryEntry) {
.position = iterator->node->position,
.depth = depth,
.state = state,
}));
return StackActionNone;
}
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth) {
SummarizeStackSession session = {
.summary = ts_malloc(sizeof(StackSummary)),
.max_depth = max_depth
};
array_init(session.summary);
stack__iter(self, version, summarize_stack_callback, &session, -1);
StackHead *head = array_get(&self->heads, version);
if (head->summary) {
array_delete(head->summary);
ts_free(head->summary);
}
head->summary = session.summary;
}
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->summary;
}
int ts_stack_dynamic_precedence(Stack *self, StackVersion version) {
return array_get(&self->heads, version)->node->dynamic_precedence;
}
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version) {
const StackHead *head = array_get(&self->heads, version);
const StackNode *node = head->node;
if (node->error_cost == 0) return true;
while (node) {
if (node->link_count > 0) {
Subtree subtree = node->links[0].subtree;
if (subtree.ptr) {
if (ts_subtree_total_bytes(subtree) > 0) {
return true;
} else if (
node->node_count > head->node_count_at_last_error &&
ts_subtree_error_cost(subtree) == 0
) {
node = node->links[0].node;
continue;
}
}
}
break;
}
return false;
}
void ts_stack_remove_version(Stack *self, StackVersion version) {
stack_head_delete(array_get(&self->heads, version), &self->node_pool, self->subtree_pool);
array_erase(&self->heads, version);
}
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2) {
if (v1 == v2) return;
ts_assert(v2 < v1);
ts_assert((uint32_t)v1 < self->heads.size);
StackHead *source_head = array_get(&self->heads, v1);
StackHead *target_head = array_get(&self->heads, v2);
if (target_head->summary && !source_head->summary) {
source_head->summary = target_head->summary;
target_head->summary = NULL;
}
stack_head_delete(target_head, &self->node_pool, self->subtree_pool);
*target_head = *source_head;
array_erase(&self->heads, v1);
}
void ts_stack_swap_versions(Stack *self, StackVersion v1, StackVersion v2) {
StackHead temporary_head = *array_get(&self->heads, v1);
*array_get(&self->heads, v1) = *array_get(&self->heads, v2);
*array_get(&self->heads, v2) = temporary_head;
}
StackVersion ts_stack_copy_version(Stack *self, StackVersion version) {
ts_assert(version < self->heads.size);
StackHead version_head = *array_get(&self->heads, version);
array_push(&self->heads, version_head);
StackHead *head = array_back(&self->heads);
stack_node_retain(head->node);
if (head->last_external_token.ptr) ts_subtree_retain(head->last_external_token);
head->summary = NULL;
return self->heads.size - 1;
}
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2) {
if (!ts_stack_can_merge(self, version1, version2)) return false;
StackHead *head1 = array_get(&self->heads, version1);
StackHead *head2 = array_get(&self->heads, version2);
for (uint32_t i = 0; i < head2->node->link_count; i++) {
stack_node_add_link(head1->node, head2->node->links[i], self->subtree_pool);
}
if (head1->node->state == ERROR_STATE) {
head1->node_count_at_last_error = head1->node->node_count;
}
ts_stack_remove_version(self, version2);
return true;
}
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2) {
StackHead *head1 = array_get(&self->heads, version1);
StackHead *head2 = array_get(&self->heads, version2);
return
head1->status == StackStatusActive &&
head2->status == StackStatusActive &&
head1->node->state == head2->node->state &&
head1->node->position.bytes == head2->node->position.bytes &&
head1->node->error_cost == head2->node->error_cost &&
ts_subtree_external_scanner_state_eq(head1->last_external_token, head2->last_external_token);
}
void ts_stack_halt(Stack *self, StackVersion version) {
array_get(&self->heads, version)->status = StackStatusHalted;
}
void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead) {
StackHead *head = array_get(&self->heads, version);
head->status = StackStatusPaused;
head->lookahead_when_paused = lookahead;
head->node_count_at_last_error = head->node->node_count;
}
bool ts_stack_is_active(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusActive;
}
bool ts_stack_is_halted(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusHalted;
}
bool ts_stack_is_paused(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->status == StackStatusPaused;
}
Subtree ts_stack_resume(Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
ts_assert(head->status == StackStatusPaused);
Subtree result = head->lookahead_when_paused;
head->status = StackStatusActive;
head->lookahead_when_paused = NULL_SUBTREE;
return result;
}
void ts_stack_clear(Stack *self) {
stack_node_retain(self->base_node);
for (uint32_t i = 0; i < self->heads.size; i++) {
stack_head_delete(array_get(&self->heads, i), &self->node_pool, self->subtree_pool);
}
array_clear(&self->heads);
array_push(&self->heads, ((StackHead) {
.node = self->base_node,
.status = StackStatusActive,
.last_external_token = NULL_SUBTREE,
.lookahead_when_paused = NULL_SUBTREE,
}));
}
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f) {
array_reserve(&self->iterators, 32);
if (!f) f = stderr;
fprintf(f, "digraph stack {\n");
fprintf(f, "rankdir=\"RL\";\n");
fprintf(f, "edge [arrowhead=none]\n");
Array(StackNode *) visited_nodes = array_new();
array_clear(&self->iterators);
for (uint32_t i = 0; i < self->heads.size; i++) {
StackHead *head = array_get(&self->heads, i);
if (head->status == StackStatusHalted) continue;
fprintf(f, "node_head_%u [shape=none, label=\"\"]\n", i);
fprintf(f, "node_head_%u -> node_%p [", i, (void *)head->node);
if (head->status == StackStatusPaused) {
fprintf(f, "color=red ");
}
fprintf(f,
"label=%u, fontcolor=blue, weight=10000, labeltooltip=\"node_count: %u\nerror_cost: %u",
i,
ts_stack_node_count_since_error(self, i),
ts_stack_error_cost(self, i)
);
if (head->summary) {
fprintf(f, "\nsummary:");
for (uint32_t j = 0; j < head->summary->size; j++) fprintf(f, " %u", array_get(head->summary, j)->state);
}
if (head->last_external_token.ptr) {
const ExternalScannerState *state = &head->last_external_token.ptr->external_scanner_state;
const char *data = ts_external_scanner_state_data(state);
fprintf(f, "\nexternal_scanner_state:");
for (uint32_t j = 0; j < state->length; j++) fprintf(f, " %2X", data[j]);
}
fprintf(f, "\"]\n");
array_push(&self->iterators, ((StackIterator) {
.node = head->node
}));
}
bool all_iterators_done = false;
while (!all_iterators_done) {
all_iterators_done = true;
for (uint32_t i = 0; i < self->iterators.size; i++) {
StackIterator iterator = *array_get(&self->iterators, i);
StackNode *node = iterator.node;
for (uint32_t j = 0; j < visited_nodes.size; j++) {
if (*array_get(&visited_nodes, j) == node) {
node = NULL;
break;
}
}
if (!node) continue;
all_iterators_done = false;
fprintf(f, "node_%p [", (void *)node);
if (node->state == ERROR_STATE) {
fprintf(f, "label=\"?\"");
} else if (
node->link_count == 1 &&
node->links[0].subtree.ptr &&
ts_subtree_extra(node->links[0].subtree)
) {
fprintf(f, "shape=point margin=0 label=\"\"");
} else {
fprintf(f, "label=\"%d\"", node->state);
}
fprintf(
f,
" tooltip=\"position: %u,%u\nnode_count:%u\nerror_cost: %u\ndynamic_precedence: %d\"];\n",
node->position.extent.row + 1,
node->position.extent.column,
node->node_count,
node->error_cost,
node->dynamic_precedence
);
for (int j = 0; j < node->link_count; j++) {
StackLink link = node->links[j];
fprintf(f, "node_%p -> node_%p [", (void *)node, (void *)link.node);
if (link.is_pending) fprintf(f, "style=dashed ");
if (link.subtree.ptr && ts_subtree_extra(link.subtree)) fprintf(f, "fontcolor=gray ");
if (!link.subtree.ptr) {
fprintf(f, "color=red");
} else {
fprintf(f, "label=\"");
bool quoted = ts_subtree_visible(link.subtree) && !ts_subtree_named(link.subtree);
if (quoted) fprintf(f, "'");
ts_language_write_symbol_as_dot_string(language, f, ts_subtree_symbol(link.subtree));
if (quoted) fprintf(f, "'");
fprintf(f, "\"");
fprintf(
f,
"labeltooltip=\"error_cost: %u\ndynamic_precedence: %" PRId32 "\"",
ts_subtree_error_cost(link.subtree),
ts_subtree_dynamic_precedence(link.subtree)
);
}
fprintf(f, "];\n");
StackIterator *next_iterator;
if (j == 0) {
next_iterator = array_get(&self->iterators, i);
} else {
array_push(&self->iterators, iterator);
next_iterator = array_back(&self->iterators);
}
next_iterator->node = link.node;
}
array_push(&visited_nodes, node);
}
}
fprintf(f, "}\n");
array_delete(&visited_nodes);
return true;
}
#undef forceinline

View File

@ -0,0 +1,133 @@
#ifndef TREE_SITTER_PARSE_STACK_H_
#define TREE_SITTER_PARSE_STACK_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "./array.h"
#include "./subtree.h"
#include <stdio.h>
typedef struct Stack Stack;
typedef unsigned StackVersion;
#define STACK_VERSION_NONE ((StackVersion)-1)
typedef struct {
SubtreeArray subtrees;
StackVersion version;
} StackSlice;
typedef Array(StackSlice) StackSliceArray;
typedef struct {
Length position;
unsigned depth;
TSStateId state;
} StackSummaryEntry;
typedef Array(StackSummaryEntry) StackSummary;
// Create a stack.
Stack *ts_stack_new(SubtreePool *subtree_pool);
// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *self);
// Get the stack's current number of versions.
uint32_t ts_stack_version_count(const Stack *self);
// Get the stack's current number of halted versions.
uint32_t ts_stack_halted_version_count(Stack *self);
// Get the state at the top of the given version of the stack. If the stack is
// empty, this returns the initial state, 0.
TSStateId ts_stack_state(const Stack *self, StackVersion version);
// Get the last external token associated with a given version of the stack.
Subtree ts_stack_last_external_token(const Stack *self, StackVersion version);
// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *self, StackVersion version, Subtree token);
// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);
// Push a tree and state onto the given version of the stack.
//
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *self, StackVersion version, Subtree subtree, bool pending, TSStateId state);
// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
// versions which had previously been merged. It returns an array that
// specifies the index of each revealed version and the trees that were
// removed from that version.
StackSliceArray ts_stack_pop_count(Stack *self, StackVersion version, uint32_t count);
// Remove an error at the top of the given version of the stack.
SubtreeArray ts_stack_pop_error(Stack *self, StackVersion version);
// Remove any pending trees from the top of the given version of the stack.
StackSliceArray ts_stack_pop_pending(Stack *self, StackVersion version);
// Remove all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *self, StackVersion version);
// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.
unsigned ts_stack_node_count_since_error(const Stack *self, StackVersion version);
int ts_stack_dynamic_precedence(Stack *self, StackVersion version);
bool ts_stack_has_advanced_since_error(const Stack *self, StackVersion version);
// Compute a summary of all the parse states near the top of the given
// version of the stack and store the summary for later retrieval.
void ts_stack_record_summary(Stack *self, StackVersion version, unsigned max_depth);
// Retrieve a summary of all the parse states near the top of the
// given version of the stack.
StackSummary *ts_stack_get_summary(Stack *self, StackVersion version);
// Get the total cost of all errors on the given version of the stack.
unsigned ts_stack_error_cost(const Stack *self, StackVersion version);
// Merge the given two stack versions if possible, returning true
// if they were successfully merged and false otherwise.
bool ts_stack_merge(Stack *self, StackVersion version1, StackVersion version2);
// Determine whether the given two stack versions can be merged.
bool ts_stack_can_merge(Stack *self, StackVersion version1, StackVersion version2);
Subtree ts_stack_resume(Stack *self, StackVersion version);
void ts_stack_pause(Stack *self, StackVersion version, Subtree lookahead);
void ts_stack_halt(Stack *self, StackVersion version);
bool ts_stack_is_active(const Stack *self, StackVersion version);
bool ts_stack_is_paused(const Stack *self, StackVersion version);
bool ts_stack_is_halted(const Stack *self, StackVersion version);
void ts_stack_renumber_version(Stack *self, StackVersion v1, StackVersion v2);
void ts_stack_swap_versions(Stack *, StackVersion v1, StackVersion v2);
StackVersion ts_stack_copy_version(Stack *self, StackVersion version);
// Remove the given version from the stack.
void ts_stack_remove_version(Stack *self, StackVersion version);
void ts_stack_clear(Stack *self);
bool ts_stack_print_dot_graph(Stack *self, const TSLanguage *language, FILE *f);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_PARSE_STACK_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,399 @@
#ifndef TREE_SITTER_SUBTREE_H_
#define TREE_SITTER_SUBTREE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdbool.h>
#include <stdio.h>
#include "./length.h"
#include "./array.h"
#include "./error_costs.h"
#include "./host.h"
#include "tree_sitter/api.h"
#include "./parser.h"
#define TS_TREE_STATE_NONE USHRT_MAX
#define NULL_SUBTREE ((Subtree) {.ptr = NULL})
// The serialized state of an external scanner.
//
// Every time an external token subtree is created after a call to an
// external scanner, the scanner's `serialize` function is called to
// retrieve a serialized copy of its state. The bytes are then copied
// onto the subtree itself so that the scanner's state can later be
// restored using its `deserialize` function.
//
// Small byte arrays are stored inline, and long ones are allocated
// separately on the heap.
typedef struct {
union {
char *long_data;
char short_data[24];
};
uint32_t length;
} ExternalScannerState;
// A compact representation of a subtree.
//
// This representation is used for small leaf nodes that are not
// errors, and were not created by an external scanner.
//
// The idea behind the layout of this struct is that the `is_inline`
// bit will fall exactly into the same location as the least significant
// bit of the pointer in `Subtree` or `MutableSubtree`, respectively.
// Because of alignment, for any valid pointer this will be 0, giving
// us the opportunity to make use of this bit to signify whether to use
// the pointer or the inline struct.
typedef struct SubtreeInlineData SubtreeInlineData;
#define SUBTREE_BITS \
bool visible : 1; \
bool named : 1; \
bool extra : 1; \
bool has_changes : 1; \
bool is_missing : 1; \
bool is_keyword : 1;
#define SUBTREE_SIZE \
uint8_t padding_columns; \
uint8_t padding_rows : 4; \
uint8_t lookahead_bytes : 4; \
uint8_t padding_bytes; \
uint8_t size_bytes;
#if TS_BIG_ENDIAN
#if TS_PTR_SIZE == 32
struct SubtreeInlineData {
uint16_t parse_state;
uint8_t symbol;
SUBTREE_BITS
bool unused : 1;
bool is_inline : 1;
SUBTREE_SIZE
};
#else
struct SubtreeInlineData {
SUBTREE_SIZE
uint16_t parse_state;
uint8_t symbol;
SUBTREE_BITS
bool unused : 1;
bool is_inline : 1;
};
#endif
#else
struct SubtreeInlineData {
bool is_inline : 1;
SUBTREE_BITS
uint8_t symbol;
uint16_t parse_state;
SUBTREE_SIZE
};
#endif
#undef SUBTREE_BITS
#undef SUBTREE_SIZE
// A heap-allocated representation of a subtree.
//
// This representation is used for parent nodes, external tokens,
// errors, and other leaf nodes whose data is too large to fit into
// the inline representation.
typedef struct {
volatile uint32_t ref_count;
Length padding;
Length size;
uint32_t lookahead_bytes;
uint32_t error_cost;
uint32_t child_count;
TSSymbol symbol;
TSStateId parse_state;
bool visible : 1;
bool named : 1;
bool extra : 1;
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool has_external_scanner_state_change : 1;
bool depends_on_column: 1;
bool is_missing : 1;
bool is_keyword : 1;
union {
// Non-terminal subtrees (`child_count > 0`)
struct {
uint32_t visible_child_count;
uint32_t named_child_count;
uint32_t visible_descendant_count;
int32_t dynamic_precedence;
uint16_t repeat_depth;
uint16_t production_id;
struct {
TSSymbol symbol;
TSStateId parse_state;
} first_leaf;
};
// External terminal subtrees (`child_count == 0 && has_external_tokens`)
ExternalScannerState external_scanner_state;
// Error terminal subtrees (`child_count == 0 && symbol == ts_builtin_sym_error`)
int32_t lookahead_char;
};
} SubtreeHeapData;
// The fundamental building block of a syntax tree.
typedef union {
SubtreeInlineData data;
const SubtreeHeapData *ptr;
} Subtree;
// Like Subtree, but mutable.
typedef union {
SubtreeInlineData data;
SubtreeHeapData *ptr;
} MutableSubtree;
typedef Array(Subtree) SubtreeArray;
typedef Array(MutableSubtree) MutableSubtreeArray;
typedef struct {
MutableSubtreeArray free_trees;
MutableSubtreeArray tree_stack;
} SubtreePool;
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length);
const char *ts_external_scanner_state_data(const ExternalScannerState *self);
bool ts_external_scanner_state_eq(const ExternalScannerState *self, const char *buffer, unsigned length);
void ts_external_scanner_state_delete(ExternalScannerState *self);
void ts_subtree_array_copy(SubtreeArray self, SubtreeArray *dest);
void ts_subtree_array_clear(SubtreePool *pool, SubtreeArray *self);
void ts_subtree_array_delete(SubtreePool *pool, SubtreeArray *self);
void ts_subtree_array_remove_trailing_extras(SubtreeArray *self, SubtreeArray *destination);
void ts_subtree_array_reverse(SubtreeArray *self);
SubtreePool ts_subtree_pool_new(uint32_t capacity);
void ts_subtree_pool_delete(SubtreePool *self);
Subtree ts_subtree_new_leaf(
SubtreePool *pool, TSSymbol symbol, Length padding, Length size,
uint32_t lookahead_bytes, TSStateId parse_state,
bool has_external_tokens, bool depends_on_column,
bool is_keyword, const TSLanguage *language
);
Subtree ts_subtree_new_error(
SubtreePool *pool, int32_t lookahead_char, Length padding, Length size,
uint32_t bytes_scanned, TSStateId parse_state, const TSLanguage *language
);
MutableSubtree ts_subtree_new_node(
TSSymbol symbol,
SubtreeArray *chiildren,
unsigned production_id,
const TSLanguage *language
);
Subtree ts_subtree_new_error_node(
SubtreeArray *children,
bool extra,
const TSLanguage * language
);
Subtree ts_subtree_new_missing_leaf(
SubtreePool *pool,
TSSymbol symbol,
Length padding,
uint32_t lookahead_bytes,
const TSLanguage *language
);
MutableSubtree ts_subtree_make_mut(SubtreePool *pool, Subtree self);
void ts_subtree_retain(Subtree self);
void ts_subtree_release(SubtreePool *pool, Subtree self);
int ts_subtree_compare(Subtree left, Subtree right, SubtreePool *pool);
void ts_subtree_set_symbol(MutableSubtree *self, TSSymbol symbol, const TSLanguage *language);
void ts_subtree_compress(MutableSubtree self, unsigned count, const TSLanguage *language, MutableSubtreeArray *stack);
void ts_subtree_summarize_children(MutableSubtree self, const TSLanguage *language);
Subtree ts_subtree_edit(Subtree self, const TSInputEdit *edit, SubtreePool *pool);
char *ts_subtree_string(Subtree self, TSSymbol alias_symbol, bool alias_is_named, const TSLanguage *language, bool include_all);
void ts_subtree_print_dot_graph(Subtree self, const TSLanguage *language, FILE *f);
Subtree ts_subtree_last_external_token(Subtree tree);
const ExternalScannerState *ts_subtree_external_scanner_state(Subtree self);
bool ts_subtree_external_scanner_state_eq(Subtree self, Subtree other);
#define SUBTREE_GET(self, name) ((self).data.is_inline ? (self).data.name : (self).ptr->name)
static inline TSSymbol ts_subtree_symbol(Subtree self) { return SUBTREE_GET(self, symbol); }
static inline bool ts_subtree_visible(Subtree self) { return SUBTREE_GET(self, visible); }
static inline bool ts_subtree_named(Subtree self) { return SUBTREE_GET(self, named); }
static inline bool ts_subtree_extra(Subtree self) { return SUBTREE_GET(self, extra); }
static inline bool ts_subtree_has_changes(Subtree self) { return SUBTREE_GET(self, has_changes); }
static inline bool ts_subtree_missing(Subtree self) { return SUBTREE_GET(self, is_missing); }
static inline bool ts_subtree_is_keyword(Subtree self) { return SUBTREE_GET(self, is_keyword); }
static inline TSStateId ts_subtree_parse_state(Subtree self) { return SUBTREE_GET(self, parse_state); }
static inline uint32_t ts_subtree_lookahead_bytes(Subtree self) { return SUBTREE_GET(self, lookahead_bytes); }
#undef SUBTREE_GET
// Get the size needed to store a heap-allocated subtree with the given
// number of children.
static inline size_t ts_subtree_alloc_size(uint32_t child_count) {
return child_count * sizeof(Subtree) + sizeof(SubtreeHeapData);
}
// Get a subtree's children, which are allocated immediately before the
// tree's own heap data.
#define ts_subtree_children(self) \
((self).data.is_inline ? NULL : (Subtree *)((self).ptr) - (self).ptr->child_count)
static inline void ts_subtree_set_extra(MutableSubtree *self, bool is_extra) {
if (self->data.is_inline) {
self->data.extra = is_extra;
} else {
self->ptr->extra = is_extra;
}
}
static inline TSSymbol ts_subtree_leaf_symbol(Subtree self) {
if (self.data.is_inline) return self.data.symbol;
if (self.ptr->child_count == 0) return self.ptr->symbol;
return self.ptr->first_leaf.symbol;
}
static inline TSStateId ts_subtree_leaf_parse_state(Subtree self) {
if (self.data.is_inline) return self.data.parse_state;
if (self.ptr->child_count == 0) return self.ptr->parse_state;
return self.ptr->first_leaf.parse_state;
}
static inline Length ts_subtree_padding(Subtree self) {
if (self.data.is_inline) {
Length result = {self.data.padding_bytes, {self.data.padding_rows, self.data.padding_columns}};
return result;
} else {
return self.ptr->padding;
}
}
static inline Length ts_subtree_size(Subtree self) {
if (self.data.is_inline) {
Length result = {self.data.size_bytes, {0, self.data.size_bytes}};
return result;
} else {
return self.ptr->size;
}
}
static inline Length ts_subtree_total_size(Subtree self) {
return length_add(ts_subtree_padding(self), ts_subtree_size(self));
}
static inline uint32_t ts_subtree_total_bytes(Subtree self) {
return ts_subtree_total_size(self).bytes;
}
static inline uint32_t ts_subtree_child_count(Subtree self) {
return self.data.is_inline ? 0 : self.ptr->child_count;
}
static inline uint32_t ts_subtree_repeat_depth(Subtree self) {
return self.data.is_inline ? 0 : self.ptr->repeat_depth;
}
static inline uint32_t ts_subtree_is_repetition(Subtree self) {
return self.data.is_inline
? 0
: !self.ptr->named && !self.ptr->visible && self.ptr->child_count != 0;
}
static inline uint32_t ts_subtree_visible_descendant_count(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0)
? 0
: self.ptr->visible_descendant_count;
}
static inline uint32_t ts_subtree_visible_child_count(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->visible_child_count;
} else {
return 0;
}
}
static inline uint32_t ts_subtree_error_cost(Subtree self) {
if (ts_subtree_missing(self)) {
return ERROR_COST_PER_MISSING_TREE + ERROR_COST_PER_RECOVERY;
} else {
return self.data.is_inline ? 0 : self.ptr->error_cost;
}
}
static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
}
static inline uint16_t ts_subtree_production_id(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->production_id;
} else {
return 0;
}
}
static inline bool ts_subtree_fragile_left(Subtree self) {
return self.data.is_inline ? false : self.ptr->fragile_left;
}
static inline bool ts_subtree_fragile_right(Subtree self) {
return self.data.is_inline ? false : self.ptr->fragile_right;
}
static inline bool ts_subtree_has_external_tokens(Subtree self) {
return self.data.is_inline ? false : self.ptr->has_external_tokens;
}
static inline bool ts_subtree_has_external_scanner_state_change(Subtree self) {
return self.data.is_inline ? false : self.ptr->has_external_scanner_state_change;
}
static inline bool ts_subtree_depends_on_column(Subtree self) {
return self.data.is_inline ? false : self.ptr->depends_on_column;
}
static inline bool ts_subtree_is_fragile(Subtree self) {
return self.data.is_inline ? false : (self.ptr->fragile_left || self.ptr->fragile_right);
}
static inline bool ts_subtree_is_error(Subtree self) {
return ts_subtree_symbol(self) == ts_builtin_sym_error;
}
static inline bool ts_subtree_is_eof(Subtree self) {
return ts_subtree_symbol(self) == ts_builtin_sym_end;
}
static inline Subtree ts_subtree_from_mut(MutableSubtree self) {
Subtree result;
result.data = self.data;
return result;
}
static inline MutableSubtree ts_subtree_to_mut_unsafe(Subtree self) {
MutableSubtree result;
result.data = self.data;
return result;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_SUBTREE_H_

View File

@ -0,0 +1,170 @@
#include "tree_sitter/api.h"
#include "./array.h"
#include "./get_changed_ranges.h"
#include "./length.h"
#include "./subtree.h"
#include "./tree_cursor.h"
#include "./tree.h"
TSTree *ts_tree_new(
Subtree root, const TSLanguage *language,
const TSRange *included_ranges, unsigned included_range_count
) {
TSTree *result = ts_malloc(sizeof(TSTree));
result->root = root;
result->language = ts_language_copy(language);
result->included_ranges = ts_calloc(included_range_count, sizeof(TSRange));
memcpy(result->included_ranges, included_ranges, included_range_count * sizeof(TSRange));
result->included_range_count = included_range_count;
return result;
}
TSTree *ts_tree_copy(const TSTree *self) {
ts_subtree_retain(self->root);
return ts_tree_new(self->root, self->language, self->included_ranges, self->included_range_count);
}
void ts_tree_delete(TSTree *self) {
if (!self) return;
SubtreePool pool = ts_subtree_pool_new(0);
ts_subtree_release(&pool, self->root);
ts_subtree_pool_delete(&pool);
ts_language_delete(self->language);
ts_free(self->included_ranges);
ts_free(self);
}
TSNode ts_tree_root_node(const TSTree *self) {
return ts_node_new(self, &self->root, ts_subtree_padding(self->root), 0);
}
TSNode ts_tree_root_node_with_offset(
const TSTree *self,
uint32_t offset_bytes,
TSPoint offset_extent
) {
Length offset = {offset_bytes, offset_extent};
return ts_node_new(self, &self->root, length_add(offset, ts_subtree_padding(self->root)), 0);
}
const TSLanguage *ts_tree_language(const TSTree *self) {
return self->language;
}
void ts_tree_edit(TSTree *self, const TSInputEdit *edit) {
for (unsigned i = 0; i < self->included_range_count; i++) {
TSRange *range = &self->included_ranges[i];
if (range->end_byte >= edit->old_end_byte) {
if (range->end_byte != UINT32_MAX) {
range->end_byte = edit->new_end_byte + (range->end_byte - edit->old_end_byte);
range->end_point = point_add(
edit->new_end_point,
point_sub(range->end_point, edit->old_end_point)
);
if (range->end_byte < edit->new_end_byte) {
range->end_byte = UINT32_MAX;
range->end_point = POINT_MAX;
}
}
} else if (range->end_byte > edit->start_byte) {
range->end_byte = edit->start_byte;
range->end_point = edit->start_point;
}
if (range->start_byte >= edit->old_end_byte) {
range->start_byte = edit->new_end_byte + (range->start_byte - edit->old_end_byte);
range->start_point = point_add(
edit->new_end_point,
point_sub(range->start_point, edit->old_end_point)
);
if (range->start_byte < edit->new_end_byte) {
range->start_byte = UINT32_MAX;
range->start_point = POINT_MAX;
}
} else if (range->start_byte > edit->start_byte) {
range->start_byte = edit->start_byte;
range->start_point = edit->start_point;
}
}
SubtreePool pool = ts_subtree_pool_new(0);
self->root = ts_subtree_edit(self->root, edit, &pool);
ts_subtree_pool_delete(&pool);
}
TSRange *ts_tree_included_ranges(const TSTree *self, uint32_t *length) {
*length = self->included_range_count;
TSRange *ranges = ts_calloc(self->included_range_count, sizeof(TSRange));
memcpy(ranges, self->included_ranges, self->included_range_count * sizeof(TSRange));
return ranges;
}
TSRange *ts_tree_get_changed_ranges(const TSTree *old_tree, const TSTree *new_tree, uint32_t *length) {
TreeCursor cursor1 = {NULL, array_new(), 0};
TreeCursor cursor2 = {NULL, array_new(), 0};
ts_tree_cursor_init(&cursor1, ts_tree_root_node(old_tree));
ts_tree_cursor_init(&cursor2, ts_tree_root_node(new_tree));
TSRangeArray included_range_differences = array_new();
ts_range_array_get_changed_ranges(
old_tree->included_ranges, old_tree->included_range_count,
new_tree->included_ranges, new_tree->included_range_count,
&included_range_differences
);
TSRange *result;
*length = ts_subtree_get_changed_ranges(
&old_tree->root, &new_tree->root, &cursor1, &cursor2,
old_tree->language, &included_range_differences, &result
);
array_delete(&included_range_differences);
array_delete(&cursor1.stack);
array_delete(&cursor2.stack);
return result;
}
#ifdef _WIN32
#include <io.h>
#include <windows.h>
int _ts_dup(HANDLE handle) {
HANDLE dup_handle;
if (!DuplicateHandle(
GetCurrentProcess(), handle,
GetCurrentProcess(), &dup_handle,
0, FALSE, DUPLICATE_SAME_ACCESS
)) return -1;
return _open_osfhandle((intptr_t)dup_handle, 0);
}
void ts_tree_print_dot_graph(const TSTree *self, int fd) {
FILE *file = _fdopen(_ts_dup((HANDLE)_get_osfhandle(fd)), "a");
ts_subtree_print_dot_graph(self->root, self->language, file);
fclose(file);
}
#elif !defined(__wasi__) // WASI doesn't support dup
#include <unistd.h>
int _ts_dup(int file_descriptor) {
return dup(file_descriptor);
}
void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
FILE *file = fdopen(_ts_dup(file_descriptor), "a");
ts_subtree_print_dot_graph(self->root, self->language, file);
fclose(file);
}
#else
void ts_tree_print_dot_graph(const TSTree *self, int file_descriptor) {
(void)self;
(void)file_descriptor;
}
#endif

View File

@ -0,0 +1,31 @@
#ifndef TREE_SITTER_TREE_H_
#define TREE_SITTER_TREE_H_
#include "./subtree.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct {
const Subtree *child;
const Subtree *parent;
Length position;
TSSymbol alias_symbol;
} ParentCacheEntry;
struct TSTree {
Subtree root;
const TSLanguage *language;
TSRange *included_ranges;
unsigned included_range_count;
};
TSTree *ts_tree_new(Subtree root, const TSLanguage *language, const TSRange *included_ranges, unsigned included_range_count);
TSNode ts_node_new(const TSTree *tree, const Subtree *subtree, Length position, TSSymbol alias);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_TREE_H_

View File

@ -0,0 +1,716 @@
#include "tree_sitter/api.h"
#include "./tree_cursor.h"
#include "./language.h"
#include "./tree.h"
typedef struct {
Subtree parent;
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
uint32_t descendant_index;
const TSSymbol *alias_sequence;
} CursorChildIterator;
// CursorChildIterator
static inline bool ts_tree_cursor_is_entry_visible(const TreeCursor *self, uint32_t index) {
TreeCursorEntry *entry = array_get(&self->stack, index);
if (index == 0 || ts_subtree_visible(*entry->subtree)) {
return true;
} else if (!ts_subtree_extra(*entry->subtree)) {
TreeCursorEntry *parent_entry = array_get(&self->stack, index - 1);
return ts_language_alias_at(
self->tree->language,
parent_entry->subtree->ptr->production_id,
entry->structural_child_index
);
} else {
return false;
}
}
static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
TreeCursorEntry *last_entry = array_back(&self->stack);
if (ts_subtree_child_count(*last_entry->subtree) == 0) {
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, 0, NULL};
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
last_entry->subtree->ptr->production_id
);
uint32_t descendant_index = last_entry->descendant_index;
if (ts_tree_cursor_is_entry_visible(self, self->stack.size - 1)) {
descendant_index += 1;
}
return (CursorChildIterator) {
.tree = self->tree,
.parent = *last_entry->subtree,
.position = last_entry->position,
.child_index = 0,
.structural_child_index = 0,
.descendant_index = descendant_index,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_tree_cursor_child_iterator_next(
CursorChildIterator *self,
TreeCursorEntry *result,
bool *visible
) {
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.structural_child_index = self->structural_child_index,
.descendant_index = self->descendant_index,
};
*visible = ts_subtree_visible(*child);
bool extra = ts_subtree_extra(*child);
if (!extra) {
if (self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
self->descendant_index += ts_subtree_visible_descendant_count(*child);
if (*visible) {
self->descendant_index += 1;
}
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
if (self->child_index < self->parent.ptr->child_count) {
Subtree next_child = ts_subtree_children(self->parent)[self->child_index];
self->position = length_add(self->position, ts_subtree_padding(next_child));
}
return true;
}
// Return a position that, when `b` is added to it, yields `a`. This
// can only be computed if `b` has zero rows. Otherwise, this function
// returns `LENGTH_UNDEFINED`, and the caller needs to recompute
// the position some other way.
static inline Length length_backtrack(Length a, Length b) {
if (length_is_undefined(a) || b.extent.row != 0) {
return LENGTH_UNDEFINED;
}
Length result;
result.bytes = a.bytes - b.bytes;
result.extent.row = a.extent.row;
result.extent.column = a.extent.column - b.extent.column;
return result;
}
static inline bool ts_tree_cursor_child_iterator_previous(
CursorChildIterator *self,
TreeCursorEntry *result,
bool *visible
) {
// this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into
// account unsigned underflow
if (!self->parent.ptr || (int8_t)self->child_index == -1) return false;
const Subtree *child = &ts_subtree_children(self->parent)[self->child_index];
*result = (TreeCursorEntry) {
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.structural_child_index = self->structural_child_index,
};
*visible = ts_subtree_visible(*child);
bool extra = ts_subtree_extra(*child);
self->position = length_backtrack(self->position, ts_subtree_padding(*child));
self->child_index--;
if (!extra && self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
if (self->structural_child_index > 0) {
self->structural_child_index--;
}
}
// unsigned can underflow so compare it to child_count
if (self->child_index < self->parent.ptr->child_count) {
Subtree previous_child = ts_subtree_children(self->parent)[self->child_index];
Length size = ts_subtree_size(previous_child);
self->position = length_backtrack(self->position, size);
}
return true;
}
// TSTreeCursor - lifecycle
TSTreeCursor ts_tree_cursor_new(TSNode node) {
TSTreeCursor self = {NULL, NULL, {0, 0, 0}};
ts_tree_cursor_init((TreeCursor *)&self, node);
return self;
}
void ts_tree_cursor_reset(TSTreeCursor *_self, TSNode node) {
ts_tree_cursor_init((TreeCursor *)_self, node);
}
void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
self->tree = node.tree;
self->root_alias_symbol = node.context[3];
array_clear(&self->stack);
array_push(&self->stack, ((TreeCursorEntry) {
.subtree = (const Subtree *)node.id,
.position = {
ts_node_start_byte(node),
ts_node_start_point(node)
},
.child_index = 0,
.structural_child_index = 0,
.descendant_index = 0,
}));
}
void ts_tree_cursor_delete(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
array_delete(&self->stack);
}
// TSTreeCursor - walking the tree
TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return TreeCursorStepVisible;
}
if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
array_push(&self->stack, entry);
return TreeCursorStepHidden;
}
}
return TreeCursorStepNone;
}
bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) {
for (;;) {
switch (ts_tree_cursor_goto_first_child_internal(self)) {
case TreeCursorStepHidden:
continue;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
}
TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone;
TreeCursorEntry last_entry = {0};
TreeCursorStep last_step = TreeCursorStepNone;
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (visible) {
last_entry = entry;
last_step = TreeCursorStepVisible;
}
else if (ts_subtree_visible_child_count(*entry.subtree) > 0) {
last_entry = entry;
last_step = TreeCursorStepHidden;
}
}
if (last_entry.subtree) {
array_push(&self->stack, last_entry);
return last_step;
}
return TreeCursorStepNone;
}
bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) {
for (;;) {
switch (ts_tree_cursor_goto_last_child_internal(self)) {
case TreeCursorStepHidden:
continue;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
}
static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point(
TSTreeCursor *_self,
uint32_t goal_byte,
TSPoint goal_point
) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
uint32_t visible_child_index = 0;
bool did_descend;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
Length entry_end = length_add(entry.position, ts_subtree_size(*entry.subtree));
bool at_goal = entry_end.bytes > goal_byte && point_gt(entry_end.extent, goal_point);
uint32_t visible_child_count = ts_subtree_visible_child_count(*entry.subtree);
if (at_goal) {
if (visible) {
array_push(&self->stack, entry);
return visible_child_index;
}
if (visible_child_count > 0) {
array_push(&self->stack, entry);
did_descend = true;
break;
}
} else if (visible) {
visible_child_index++;
} else {
visible_child_index += visible_child_count;
}
}
} while (did_descend);
self->stack.size = initial_size;
return -1;
}
int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *self, uint32_t goal_byte) {
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, goal_byte, POINT_ZERO);
}
int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint goal_point) {
return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point);
}
TreeCursorStep ts_tree_cursor_goto_sibling_internal(
TSTreeCursor *_self,
bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)
) {
TreeCursor *self = (TreeCursor *)_self;
uint32_t initial_size = self->stack.size;
while (self->stack.size > 1) {
TreeCursorEntry entry = array_pop(&self->stack);
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
iterator.child_index = entry.child_index;
iterator.structural_child_index = entry.structural_child_index;
iterator.position = entry.position;
iterator.descendant_index = entry.descendant_index;
bool visible = false;
advance(&iterator, &entry, &visible);
if (visible && self->stack.size + 1 < initial_size) break;
while (advance(&iterator, &entry, &visible)) {
if (visible) {
array_push(&self->stack, entry);
return TreeCursorStepVisible;
}
if (ts_subtree_visible_child_count(*entry.subtree)) {
array_push(&self->stack, entry);
return TreeCursorStepHidden;
}
}
}
self->stack.size = initial_size;
return TreeCursorStepNone;
}
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) {
return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next);
}
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) {
switch (ts_tree_cursor_goto_next_sibling_internal(self)) {
case TreeCursorStepHidden:
ts_tree_cursor_goto_first_child(self);
return true;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) {
// since subtracting across row loses column information, we may have to
// restore it
TreeCursor *self = (TreeCursor *)_self;
// for that, save current position before traversing
TreeCursorStep step = ts_tree_cursor_goto_sibling_internal(
_self, ts_tree_cursor_child_iterator_previous);
if (step == TreeCursorStepNone)
return step;
// if length is already valid, there's no need to recompute it
if (!length_is_undefined(array_back(&self->stack)->position))
return step;
// restore position from the parent node
const TreeCursorEntry *parent = array_get(&self->stack, self->stack.size - 2);
Length position = parent->position;
uint32_t child_index = array_back(&self->stack)->child_index;
const Subtree *children = ts_subtree_children((*(parent->subtree)));
if (child_index > 0) {
// skip first child padding since its position should match the position of the parent
position = length_add(position, ts_subtree_size(children[0]));
for (uint32_t i = 1; i < child_index; ++i) {
position = length_add(position, ts_subtree_total_size(children[i]));
}
position = length_add(position, ts_subtree_padding(children[child_index]));
}
array_back(&self->stack)->position = position;
return step;
}
bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) {
switch (ts_tree_cursor_goto_previous_sibling_internal(self)) {
case TreeCursorStepHidden:
ts_tree_cursor_goto_last_child(self);
return true;
case TreeCursorStepVisible:
return true;
default:
return false;
}
}
bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursor *self = (TreeCursor *)_self;
for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) {
if (ts_tree_cursor_is_entry_visible(self, i)) {
self->stack.size = i + 1;
return true;
}
}
return false;
}
void ts_tree_cursor_goto_descendant(
TSTreeCursor *_self,
uint32_t goal_descendant_index
) {
TreeCursor *self = (TreeCursor *)_self;
// Ascend to the lowest ancestor that contains the goal node.
for (;;) {
uint32_t i = self->stack.size - 1;
TreeCursorEntry *entry = array_get(&self->stack, i);
uint32_t next_descendant_index =
entry->descendant_index +
(ts_tree_cursor_is_entry_visible(self, i) ? 1 : 0) +
ts_subtree_visible_descendant_count(*entry->subtree);
if (
(entry->descendant_index <= goal_descendant_index) &&
(next_descendant_index > goal_descendant_index)
) {
break;
} else if (self->stack.size <= 1) {
return;
} else {
self->stack.size--;
}
}
// Descend to the goal node.
bool did_descend = true;
do {
did_descend = false;
bool visible;
TreeCursorEntry entry;
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
if (iterator.descendant_index > goal_descendant_index) {
return;
}
while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) {
if (iterator.descendant_index > goal_descendant_index) {
array_push(&self->stack, entry);
if (visible && entry.descendant_index == goal_descendant_index) {
return;
} else {
did_descend = true;
break;
}
}
}
} while (did_descend);
}
uint32_t ts_tree_cursor_current_descendant_index(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
return last_entry->descendant_index;
}
TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
bool is_extra = ts_subtree_extra(*last_entry->subtree);
TSSymbol alias_symbol = is_extra ? 0 : self->root_alias_symbol;
if (self->stack.size > 1 && !is_extra) {
TreeCursorEntry *parent_entry = array_get(&self->stack, self->stack.size - 2);
alias_symbol = ts_language_alias_at(
self->tree->language,
parent_entry->subtree->ptr->production_id,
last_entry->structural_child_index
);
}
return ts_node_new(
self->tree,
last_entry->subtree,
last_entry->position,
alias_symbol
);
}
// Private - Get various facts about the current node that are needed
// when executing tree queries.
void ts_tree_cursor_current_status(
const TSTreeCursor *_self,
TSFieldId *field_id,
bool *has_later_siblings,
bool *has_later_named_siblings,
bool *can_have_later_siblings_with_this_field,
TSSymbol *supertypes,
unsigned *supertype_count
) {
const TreeCursor *self = (const TreeCursor *)_self;
unsigned max_supertypes = *supertype_count;
*field_id = 0;
*supertype_count = 0;
*has_later_siblings = false;
*has_later_named_siblings = false;
*can_have_later_siblings_with_this_field = false;
// Walk up the tree, visiting the current node and its invisible ancestors,
// because fields can refer to nodes through invisible *wrapper* nodes,
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = array_get(&self->stack, i);
TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1);
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->production_id
);
#define subtree_symbol(subtree, structural_child_index) \
(( \
!ts_subtree_extra(subtree) && \
alias_sequence && \
alias_sequence[structural_child_index] \
) ? \
alias_sequence[structural_child_index] : \
ts_subtree_symbol(subtree))
// Stop walking up when a visible ancestor is found.
TSSymbol entry_symbol = subtree_symbol(
*entry->subtree,
entry->structural_child_index
);
TSSymbolMetadata entry_metadata = ts_language_symbol_metadata(
self->tree->language,
entry_symbol
);
if (i != self->stack.size - 1 && entry_metadata.visible) break;
// Record any supertypes
if (entry_metadata.supertype && *supertype_count < max_supertypes) {
supertypes[*supertype_count] = entry_symbol;
(*supertype_count)++;
}
// Determine if the current node has later siblings.
if (!*has_later_siblings) {
unsigned sibling_count = parent_entry->subtree->ptr->child_count;
unsigned structural_child_index = entry->structural_child_index;
if (!ts_subtree_extra(*entry->subtree)) structural_child_index++;
for (unsigned j = entry->child_index + 1; j < sibling_count; j++) {
Subtree sibling = ts_subtree_children(*parent_entry->subtree)[j];
TSSymbolMetadata sibling_metadata = ts_language_symbol_metadata(
self->tree->language,
subtree_symbol(sibling, structural_child_index)
);
if (sibling_metadata.visible) {
*has_later_siblings = true;
if (*has_later_named_siblings) break;
if (sibling_metadata.named) {
*has_later_named_siblings = true;
break;
}
} else if (ts_subtree_visible_child_count(sibling) > 0) {
*has_later_siblings = true;
if (*has_later_named_siblings) break;
if (sibling.ptr->named_child_count > 0) {
*has_later_named_siblings = true;
break;
}
}
if (!ts_subtree_extra(sibling)) structural_child_index++;
}
}
#undef subtree_symbol
if (!ts_subtree_extra(*entry->subtree)) {
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end
);
// Look for a field name associated with the current node.
if (!*field_id) {
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (!map->inherited && map->child_index == entry->structural_child_index) {
*field_id = map->field_id;
break;
}
}
}
// Determine if the current node can have later siblings with the same field name.
if (*field_id) {
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (
map->field_id == *field_id &&
map->child_index > entry->structural_child_index
) {
*can_have_later_siblings_with_this_field = true;
break;
}
}
}
}
}
}
uint32_t ts_tree_cursor_current_depth(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
uint32_t depth = 0;
for (unsigned i = 1; i < self->stack.size; i++) {
if (ts_tree_cursor_is_entry_visible(self, i)) {
depth++;
}
}
return depth;
}
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
for (int i = (int)self->stack.size - 2; i >= 0; i--) {
TreeCursorEntry *entry = array_get(&self->stack, i);
bool is_visible = true;
TSSymbol alias_symbol = 0;
if (i > 0) {
TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1);
alias_symbol = ts_language_alias_at(
self->tree->language,
parent_entry->subtree->ptr->production_id,
entry->structural_child_index
);
is_visible = (alias_symbol != 0) || ts_subtree_visible(*entry->subtree);
}
if (is_visible) {
return ts_node_new(
self->tree,
entry->subtree,
entry->position,
alias_symbol
);
}
}
return ts_node_new(NULL, NULL, length_zero(), 0);
}
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
// Walk up the tree, visiting the current node and its invisible ancestors.
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = array_get(&self->stack, i);
TreeCursorEntry *parent_entry = array_get(&self->stack, i - 1);
// Stop walking up when another visible node is found.
if (
i != self->stack.size - 1 &&
ts_tree_cursor_is_entry_visible(self, i)
) break;
if (ts_subtree_extra(*entry->subtree)) break;
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end
);
for (const TSFieldMapEntry *map = field_map; map < field_map_end; map++) {
if (!map->inherited && map->child_index == entry->structural_child_index) {
return map->field_id;
}
}
}
return 0;
}
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
TSFieldId id = ts_tree_cursor_current_field_id(_self);
if (id) {
const TreeCursor *self = (const TreeCursor *)_self;
return self->tree->language->field_names[id];
} else {
return NULL;
}
}
TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) {
const TreeCursor *cursor = (const TreeCursor *)_cursor;
TSTreeCursor res = {NULL, NULL, {0, 0}};
TreeCursor *copy = (TreeCursor *)&res;
copy->tree = cursor->tree;
copy->root_alias_symbol = cursor->root_alias_symbol;
array_init(&copy->stack);
array_push_all(&copy->stack, &cursor->stack);
return res;
}
void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) {
const TreeCursor *cursor = (const TreeCursor *)_src;
TreeCursor *copy = (TreeCursor *)_dst;
copy->tree = cursor->tree;
copy->root_alias_symbol = cursor->root_alias_symbol;
array_clear(&copy->stack);
array_push_all(&copy->stack, &cursor->stack);
}

View File

@ -0,0 +1,48 @@
#ifndef TREE_SITTER_TREE_CURSOR_H_
#define TREE_SITTER_TREE_CURSOR_H_
#include "./subtree.h"
typedef struct {
const Subtree *subtree;
Length position;
uint32_t child_index;
uint32_t structural_child_index;
uint32_t descendant_index;
} TreeCursorEntry;
typedef struct {
const TSTree *tree;
Array(TreeCursorEntry) stack;
TSSymbol root_alias_symbol;
} TreeCursor;
typedef enum {
TreeCursorStepNone,
TreeCursorStepHidden,
TreeCursorStepVisible,
} TreeCursorStep;
void ts_tree_cursor_init(TreeCursor *self, TSNode node);
void ts_tree_cursor_current_status(
const TSTreeCursor *_self,
TSFieldId *field_id,
bool *has_later_siblings,
bool *has_later_named_siblings,
bool *can_have_later_siblings_with_this_field,
TSSymbol *supertypes,
unsigned *supertype_count
);
TreeCursorStep ts_tree_cursor_goto_first_child_internal(TSTreeCursor *_self);
TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self);
static inline Subtree ts_tree_cursor_current_subtree(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
return *last_entry->subtree;
}
TSNode ts_tree_cursor_parent_node(const TSTreeCursor *_self);
#endif // TREE_SITTER_TREE_CURSOR_H_

View File

@ -0,0 +1,11 @@
#ifndef TREE_SITTER_ASSERT_H_
#define TREE_SITTER_ASSERT_H_
#ifdef NDEBUG
#define ts_assert(e) ((void)(e))
#else
#include <assert.h>
#define ts_assert(e) assert(e)
#endif
#endif // TREE_SITTER_ASSERT_H_

View File

@ -0,0 +1,75 @@
#ifndef TREE_SITTER_UNICODE_H_
#define TREE_SITTER_UNICODE_H_
#ifdef __cplusplus
extern "C" {
#endif
#include <limits.h>
#include <stdint.h>
#define U_EXPORT
#define U_EXPORT2
#include "unicode/utf8.h"
#include "unicode/utf16.h"
#include "portable/endian.h"
#define U16_NEXT_LE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=le16toh((s)[(i)++]); \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} \
} UPRV_BLOCK_MACRO_END
#define U16_NEXT_BE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=be16toh((s)[(i)++]); \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} \
} UPRV_BLOCK_MACRO_END
static const int32_t TS_DECODE_ERROR = U_SENTINEL;
static inline uint32_t ts_decode_utf8(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U8_NEXT(string, i, length, *code_point);
return i;
}
static inline uint32_t ts_decode_utf16_le(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U16_NEXT_LE(((uint16_t *)string), i, length, *code_point);
return i * 2;
}
static inline uint32_t ts_decode_utf16_be(
const uint8_t *string,
uint32_t length,
int32_t *code_point
) {
uint32_t i = 0;
U16_NEXT_BE(((uint16_t *)string), i, length, *code_point);
return i * 2;
}
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_UNICODE_H_

View File

@ -0,0 +1 @@
552b01f61127d30d6589aa4bf99468224979b661

View File

@ -0,0 +1,414 @@
COPYRIGHT AND PERMISSION NOTICE (ICU 58 and later)
Copyright © 1991-2019 Unicode, Inc. All rights reserved.
Distributed under the Terms of Use in https://www.unicode.org/copyright.html.
Permission is hereby granted, free of charge, to any person obtaining
a copy of the Unicode data files and any associated documentation
(the "Data Files") or Unicode software and any associated documentation
(the "Software") to deal in the Data Files or Software
without restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, and/or sell copies of
the Data Files or Software, and to permit persons to whom the Data Files
or Software are furnished to do so, provided that either
(a) this copyright and permission notice appear with all copies
of the Data Files or Software, or
(b) this copyright and permission notice appear in associated
Documentation.
THE DATA FILES AND SOFTWARE ARE PROVIDED "AS IS", WITHOUT WARRANTY OF
ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT OF THIRD PARTY RIGHTS.
IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS
NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL
DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
PERFORMANCE OF THE DATA FILES OR SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale,
use or other dealings in these Data Files or Software without prior
written authorization of the copyright holder.
---------------------
Third-Party Software Licenses
This section contains third-party software notices and/or additional
terms for licensed third-party software components included within ICU
libraries.
1. ICU License - ICU 1.8.1 to ICU 57.1
COPYRIGHT AND PERMISSION NOTICE
Copyright (c) 1995-2016 International Business Machines Corporation and others
All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, and/or sell copies of the Software, and to permit persons
to whom the Software is furnished to do so, provided that the above
copyright notice(s) and this permission notice appear in all copies of
the Software and that both the above copyright notice(s) and this
permission notice appear in supporting documentation.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY
SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF
CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Except as contained in this notice, the name of a copyright holder
shall not be used in advertising or otherwise to promote the sale, use
or other dealings in this Software without prior written authorization
of the copyright holder.
All trademarks and registered trademarks mentioned herein are the
property of their respective owners.
2. Chinese/Japanese Word Break Dictionary Data (cjdict.txt)
# The Google Chrome software developed by Google is licensed under
# the BSD license. Other software included in this distribution is
# provided under other licenses, as set forth below.
#
# The BSD License
# http://opensource.org/licenses/bsd-license.php
# Copyright (C) 2006-2008, Google Inc.
#
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above
# copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided with
# the distribution.
# Neither the name of Google Inc. nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
# BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
# The word list in cjdict.txt are generated by combining three word lists
# listed below with further processing for compound word breaking. The
# frequency is generated with an iterative training against Google web
# corpora.
#
# * Libtabe (Chinese)
# - https://sourceforge.net/project/?group_id=1519
# - Its license terms and conditions are shown below.
#
# * IPADIC (Japanese)
# - http://chasen.aist-nara.ac.jp/chasen/distribution.html
# - Its license terms and conditions are shown below.
#
# ---------COPYING.libtabe ---- BEGIN--------------------
#
# /*
# * Copyright (c) 1999 TaBE Project.
# * Copyright (c) 1999 Pai-Hsiang Hsiao.
# * All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the TaBE Project nor the names of its
# * contributors may be used to endorse or promote products derived
# * from this software without specific prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# /*
# * Copyright (c) 1999 Computer Systems and Communication Lab,
# * Institute of Information Science, Academia
# * Sinica. All rights reserved.
# *
# * Redistribution and use in source and binary forms, with or without
# * modification, are permitted provided that the following conditions
# * are met:
# *
# * . Redistributions of source code must retain the above copyright
# * notice, this list of conditions and the following disclaimer.
# * . Redistributions in binary form must reproduce the above copyright
# * notice, this list of conditions and the following disclaimer in
# * the documentation and/or other materials provided with the
# * distribution.
# * . Neither the name of the Computer Systems and Communication Lab
# * nor the names of its contributors may be used to endorse or
# * promote products derived from this software without specific
# * prior written permission.
# *
# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# * REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
# * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# * OF THE POSSIBILITY OF SUCH DAMAGE.
# */
#
# Copyright 1996 Chih-Hao Tsai @ Beckman Institute,
# University of Illinois
# c-tsai4@uiuc.edu http://casper.beckman.uiuc.edu/~c-tsai4
#
# ---------------COPYING.libtabe-----END--------------------------------
#
#
# ---------------COPYING.ipadic-----BEGIN-------------------------------
#
# Copyright 2000, 2001, 2002, 2003 Nara Institute of Science
# and Technology. All Rights Reserved.
#
# Use, reproduction, and distribution of this software is permitted.
# Any copy of this software, whether in its original form or modified,
# must include both the above copyright notice and the following
# paragraphs.
#
# Nara Institute of Science and Technology (NAIST),
# the copyright holders, disclaims all warranties with regard to this
# software, including all implied warranties of merchantability and
# fitness, in no event shall NAIST be liable for
# any special, indirect or consequential damages or any damages
# whatsoever resulting from loss of use, data or profits, whether in an
# action of contract, negligence or other tortuous action, arising out
# of or in connection with the use or performance of this software.
#
# A large portion of the dictionary entries
# originate from ICOT Free Software. The following conditions for ICOT
# Free Software applies to the current dictionary as well.
#
# Each User may also freely distribute the Program, whether in its
# original form or modified, to any third party or parties, PROVIDED
# that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear
# on, or be attached to, the Program, which is distributed substantially
# in the same form as set out herein and that such intended
# distribution, if actually made, will neither violate or otherwise
# contravene any of the laws and regulations of the countries having
# jurisdiction over the User or the intended distribution itself.
#
# NO WARRANTY
#
# The program was produced on an experimental basis in the course of the
# research and development conducted during the project and is provided
# to users as so produced on an experimental basis. Accordingly, the
# program is provided without any warranty whatsoever, whether express,
# implied, statutory or otherwise. The term "warranty" used herein
# includes, but is not limited to, any warranty of the quality,
# performance, merchantability and fitness for a particular purpose of
# the program and the nonexistence of any infringement or violation of
# any right of any third party.
#
# Each user of the program will agree and understand, and be deemed to
# have agreed and understood, that there is no warranty whatsoever for
# the program and, accordingly, the entire risk arising from or
# otherwise connected with the program is assumed by the user.
#
# Therefore, neither ICOT, the copyright holder, or any other
# organization that participated in or was otherwise related to the
# development of the program and their respective officials, directors,
# officers and other employees shall be held liable for any and all
# damages, including, without limitation, general, special, incidental
# and consequential damages, arising out of or otherwise in connection
# with the use or inability to use the program or any product, material
# or result produced or otherwise obtained by using the program,
# regardless of whether they have been advised of, or otherwise had
# knowledge of, the possibility of such damages at any time during the
# project or thereafter. Each user will be deemed to have agreed to the
# foregoing by his or her commencement of use of the program. The term
# "use" as used herein includes, but is not limited to, the use,
# modification, copying and distribution of the program and the
# production of secondary products from the program.
#
# In the case where the program, whether in its original form or
# modified, was distributed or delivered to or received by a user from
# any person, organization or entity other than ICOT, unless it makes or
# grants independently of ICOT any specific warranty to the user in
# writing, such person, organization or entity, will also be exempted
# from and not be held liable to the user for any such damages as noted
# above as far as the program is concerned.
#
# ---------------COPYING.ipadic-----END----------------------------------
3. Lao Word Break Dictionary Data (laodict.txt)
# Copyright (c) 2013 International Business Machines Corporation
# and others. All Rights Reserved.
#
# Project: http://code.google.com/p/lao-dictionary/
# Dictionary: http://lao-dictionary.googlecode.com/git/Lao-Dictionary.txt
# License: http://lao-dictionary.googlecode.com/git/Lao-Dictionary-LICENSE.txt
# (copied below)
#
# This file is derived from the above dictionary, with slight
# modifications.
# ----------------------------------------------------------------------
# Copyright (C) 2013 Brian Eugene Wilson, Robert Martin Campbell.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification,
# are permitted provided that the following conditions are met:
#
#
# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer. Redistributions in
# binary form must reproduce the above copyright notice, this list of
# conditions and the following disclaimer in the documentation and/or
# other materials provided with the distribution.
#
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
# COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
# INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
# --------------------------------------------------------------------------
4. Burmese Word Break Dictionary Data (burmesedict.txt)
# Copyright (c) 2014 International Business Machines Corporation
# and others. All Rights Reserved.
#
# This list is part of a project hosted at:
# github.com/kanyawtech/myanmar-karen-word-lists
#
# --------------------------------------------------------------------------
# Copyright (c) 2013, LeRoy Benjamin Sharon
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met: Redistributions of source code must retain the above
# copyright notice, this list of conditions and the following
# disclaimer. Redistributions in binary form must reproduce the
# above copyright notice, this list of conditions and the following
# disclaimer in the documentation and/or other materials provided
# with the distribution.
#
# Neither the name Myanmar Karen Word Lists, nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
# CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
# INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
# MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS
# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
# TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
# ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
# TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
# THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
# --------------------------------------------------------------------------
5. Time Zone Database
ICU uses the public domain data and code derived from Time Zone
Database for its time zone support. The ownership of the TZ database
is explained in BCP 175: Procedure for Maintaining the Time Zone
Database section 7.
# 7. Database Ownership
#
# The TZ database itself is not an IETF Contribution or an IETF
# document. Rather it is a pre-existing and regularly updated work
# that is in the public domain, and is intended to remain in the
# public domain. Therefore, BCPs 78 [RFC5378] and 79 [RFC3979] do
# not apply to the TZ Database or contributions that individuals make
# to it. Should any claims be made and substantiated against the TZ
# Database, the organization that is providing the IANA
# Considerations defined in this RFC, under the memorandum of
# understanding with the IETF, currently ICANN, may act in accordance
# with all competent court orders. No ownership claims will be made
# by ICANN or the IETF Trust on the database or the code. Any person
# making a contribution to the database or code waives all rights to
# future claims in that contribution or in the TZ Database.
6. Google double-conversion
Copyright 2006-2011, the V8 project authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following
disclaimer in the documentation and/or other materials provided
with the distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,29 @@
# ICU Parts
This directory contains a small subset of files from the Unicode organization's [ICU repository](https://github.com/unicode-org/icu).
### License
The license for these files is contained in the `LICENSE` file within this directory.
### Contents
* Source files taken from the [`icu4c/source/common/unicode`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c/source/common/unicode) directory:
* `utf8.h`
* `utf16.h`
* `umachine.h`
* Empty source files that are referenced by the above source files, but whose original contents in `libicu` are not needed:
* `ptypes.h`
* `urename.h`
* `utf.h`
* `ICU_SHA` - File containing the Git SHA of the commit in the `icu` repository from which the files were obtained.
* `LICENSE` - The license file from the [`icu4c`](https://github.com/unicode-org/icu/tree/552b01f61127d30d6589aa4bf99468224979b661/icu4c) directory of the `icu` repository.
* `README.md` - This text file.
### Updating ICU
To incorporate changes from the upstream `icu` repository:
* Update `ICU_SHA` with the new Git SHA.
* Update `LICENSE` with the license text from the directory mentioned above.
* Update `utf8.h`, `utf16.h`, and `umachine.h` with their new contents in the `icu` repository.

View File

@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.

View File

@ -0,0 +1,448 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
******************************************************************************
* file name: umachine.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*
* This file defines basic types and constants for ICU to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*/
#ifndef __UMACHINE_H__
#define __UMACHINE_H__
/**
* \file
* \brief Basic types and constants for UTF
*
* <h2> Basic types and constants for UTF </h2>
* This file defines basic types and constants for utf.h to be
* platform-independent. umachine.h and utf.h are included into
* utypes.h to provide all the general definitions for ICU.
* All of these definitions used to be in utypes.h before
* the UTF-handling macros made this unmaintainable.
*
*/
/*==========================================================================*/
/* Include platform-dependent definitions */
/* which are contained in the platform-specific file platform.h */
/*==========================================================================*/
#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
/*
* ANSI C headers:
* stddef.h defines wchar_t
*/
#include <stddef.h>
/*==========================================================================*/
/* For C wrappers, we use the symbol U_STABLE. */
/* This works properly if the includer is C or C++. */
/* Functions are declared U_STABLE return-type U_EXPORT2 function-name()... */
/*==========================================================================*/
/**
* \def U_CFUNC
* This is used in a declaration of a library private ICU C function.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_BEGIN
* This is used to begin a declaration of a library private ICU C API.
* @stable ICU 2.4
*/
/**
* \def U_CDECL_END
* This is used to end a declaration of a library private ICU C API
* @stable ICU 2.4
*/
#ifdef __cplusplus
# define U_CFUNC extern "C"
# define U_CDECL_BEGIN extern "C" {
# define U_CDECL_END }
#else
# define U_CFUNC extern
# define U_CDECL_BEGIN
# define U_CDECL_END
#endif
#ifndef U_ATTRIBUTE_DEPRECATED
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for GCC specific attributes
* @internal
*/
#if U_GCC_MAJOR_MINOR >= 302
# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
/**
* \def U_ATTRIBUTE_DEPRECATED
* This is used for Visual C++ specific attributes
* @internal
*/
#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
#else
# define U_ATTRIBUTE_DEPRECATED
#endif
#endif
/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
#define U_CAPI U_CFUNC U_EXPORT
/** This is used to declare a function as a stable public ICU C API*/
#define U_STABLE U_CAPI
/** This is used to declare a function as a draft public ICU C API */
#define U_DRAFT U_CAPI
/** This is used to declare a function as a deprecated public ICU C API */
#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
/** This is used to declare a function as an obsolete public ICU C API */
#define U_OBSOLETE U_CAPI
/** This is used to declare a function as an internal ICU C API */
#define U_INTERNAL U_CAPI
/**
* \def U_OVERRIDE
* Defined to the C++11 "override" keyword if available.
* Denotes a class or member which is an override of the base class.
* May result in an error if it applied to something not an override.
* @internal
*/
#ifndef U_OVERRIDE
#define U_OVERRIDE override
#endif
/**
* \def U_FINAL
* Defined to the C++11 "final" keyword if available.
* Denotes a class or member which may not be overridden in subclasses.
* May result in an error if subclasses attempt to override.
* @internal
*/
#if !defined(U_FINAL) || defined(U_IN_DOXYGEN)
#define U_FINAL final
#endif
// Before ICU 65, function-like, multi-statement ICU macros were just defined as
// series of statements wrapped in { } blocks and the caller could choose to
// either treat them as if they were actual functions and end the invocation
// with a trailing ; creating an empty statement after the block or else omit
// this trailing ; using the knowledge that the macro would expand to { }.
//
// But doing so doesn't work well with macros that look like functions and
// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
// switches to the standard solution of wrapping such macros in do { } while.
//
// This will however break existing code that depends on being able to invoke
// these macros without a trailing ; so to be able to remain compatible with
// such code the wrapper is itself defined as macros so that it's possible to
// build ICU 65 and later with the old macro behaviour, like this:
//
// CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
// runConfigureICU ...
/**
* \def UPRV_BLOCK_MACRO_BEGIN
* Defined as the "do" keyword by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_BEGIN
#define UPRV_BLOCK_MACRO_BEGIN do
#endif
/**
* \def UPRV_BLOCK_MACRO_END
* Defined as "while (FALSE)" by default.
* @internal
*/
#ifndef UPRV_BLOCK_MACRO_END
#define UPRV_BLOCK_MACRO_END while (FALSE)
#endif
/*==========================================================================*/
/* limits for int32_t etc., like in POSIX inttypes.h */
/*==========================================================================*/
#ifndef INT8_MIN
/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MIN ((int8_t)(-128))
#endif
#ifndef INT16_MIN
/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MIN ((int16_t)(-32767-1))
#endif
#ifndef INT32_MIN
/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MIN ((int32_t)(-2147483647-1))
#endif
#ifndef INT8_MAX
/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
# define INT8_MAX ((int8_t)(127))
#endif
#ifndef INT16_MAX
/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
# define INT16_MAX ((int16_t)(32767))
#endif
#ifndef INT32_MAX
/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
# define INT32_MAX ((int32_t)(2147483647))
#endif
#ifndef UINT8_MAX
/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT8_MAX ((uint8_t)(255U))
#endif
#ifndef UINT16_MAX
/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT16_MAX ((uint16_t)(65535U))
#endif
#ifndef UINT32_MAX
/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
# define UINT32_MAX ((uint32_t)(4294967295U))
#endif
#if defined(U_INT64_T_UNAVAILABLE)
# error int64_t is required for decimal format and rule-based number format.
#else
# ifndef INT64_C
/**
* Provides a platform independent way to specify a signed 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
* @stable ICU 2.8
*/
# define INT64_C(c) c ## LL
# endif
# ifndef UINT64_C
/**
* Provides a platform independent way to specify an unsigned 64-bit integer constant.
* note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
* @stable ICU 2.8
*/
# define UINT64_C(c) c ## ULL
# endif
# ifndef U_INT64_MIN
/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
# endif
# ifndef U_INT64_MAX
/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
# endif
# ifndef U_UINT64_MAX
/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
# endif
#endif
/*==========================================================================*/
/* Boolean data type */
/*==========================================================================*/
/** The ICU boolean type @stable ICU 2.0 */
typedef int8_t UBool;
#ifndef TRUE
/** The TRUE value of a UBool @stable ICU 2.0 */
# define TRUE 1
#endif
#ifndef FALSE
/** The FALSE value of a UBool @stable ICU 2.0 */
# define FALSE 0
#endif
/*==========================================================================*/
/* Unicode data types */
/*==========================================================================*/
/* wchar_t-related definitions -------------------------------------------- */
/*
* \def U_WCHAR_IS_UTF16
* Defined if wchar_t uses UTF-16.
*
* @stable ICU 2.0
*/
/*
* \def U_WCHAR_IS_UTF32
* Defined if wchar_t uses UTF-32.
*
* @stable ICU 2.0
*/
#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
# ifdef __STDC_ISO_10646__
# if (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# elif (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif defined __UCS2__
# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
# define U_WCHAR_IS_UTF16
# endif
# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
# if (U_SIZEOF_WCHAR_T==4)
# define U_WCHAR_IS_UTF32
# endif
# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
# define U_WCHAR_IS_UTF32
# elif U_PLATFORM_HAS_WIN32_API
# define U_WCHAR_IS_UTF16
# endif
#endif
/* UChar and UChar32 definitions -------------------------------------------- */
/** Number of bytes in a UChar. @stable ICU 2.0 */
#define U_SIZEOF_UCHAR 2
/**
* \def U_CHAR16_IS_TYPEDEF
* If 1, then char16_t is a typedef and not a real type (yet)
* @internal
*/
#if (U_PLATFORM == U_PF_AIX) && defined(__cplusplus) &&(U_CPLUSPLUS_VERSION < 11)
// for AIX, uchar.h needs to be included
# include <uchar.h>
# define U_CHAR16_IS_TYPEDEF 1
#elif defined(_MSC_VER) && (_MSC_VER < 1900)
// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
# define U_CHAR16_IS_TYPEDEF 1
#else
# define U_CHAR16_IS_TYPEDEF 0
#endif
/**
* \var UChar
*
* The base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
* Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
*
* UChar is configurable by defining the macro UCHAR_TYPE
* on the preprocessor or compiler command line:
* -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
* (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
* This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
*
* The default is UChar=char16_t.
*
* C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
*
* In C, char16_t is a simple typedef of uint_least16_t.
* ICU requires uint_least16_t=uint16_t for data memory mapping.
* On macOS, char16_t is not available because the uchar.h standard header is missing.
*
* @stable ICU 4.4
*/
#if 1
// #if 1 is normal. UChar defaults to char16_t in C++.
// For configuration testing of UChar=uint16_t temporarily change this to #if 0.
// The intltest Makefile #defines UCHAR_TYPE=char16_t,
// so we only #define it to uint16_t if it is undefined so far.
#elif !defined(UCHAR_TYPE)
# define UCHAR_TYPE uint16_t
#endif
#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
// Inside the ICU library code, never configurable.
typedef char16_t UChar;
#elif defined(UCHAR_TYPE)
typedef UCHAR_TYPE UChar;
#elif defined(__cplusplus)
typedef char16_t UChar;
#else
typedef uint16_t UChar;
#endif
/**
* \var OldUChar
* Default ICU 58 definition of UChar.
* A base type for UTF-16 code units and pointers.
* Unsigned 16-bit integer.
*
* Define OldUChar to be wchar_t if that is 16 bits wide.
* If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
*
* This makes the definition of OldUChar platform-dependent
* but allows direct string type compatibility with platforms with
* 16-bit wchar_t types.
*
* This is how UChar was defined in ICU 58, for transition convenience.
* Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
* The current UChar responds to UCHAR_TYPE but OldUChar does not.
*
* @stable ICU 59
*/
#if U_SIZEOF_WCHAR_T==2
typedef wchar_t OldUChar;
#elif defined(__CHAR16_TYPE__)
typedef __CHAR16_TYPE__ OldUChar;
#else
typedef uint16_t OldUChar;
#endif
/**
* Define UChar32 as a type for single Unicode code points.
* UChar32 is a signed 32-bit integer (same as int32_t).
*
* The Unicode code point range is 0..0x10ffff.
* All other values (negative or >=0x110000) are illegal as Unicode code points.
* They may be used as sentinel values to indicate "done", "error"
* or similar non-code point conditions.
*
* Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
* to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
* or else to be uint32_t.
* That is, the definition of UChar32 was platform-dependent.
*
* @see U_SENTINEL
* @stable ICU 2.4
*/
typedef int32_t UChar32;
/**
* This value is intended for sentinel values for APIs that
* (take or) return single code points (UChar32).
* It is outside of the Unicode code point range 0..0x10ffff.
*
* For example, a "done" or "error" value in a new API
* could be indicated with U_SENTINEL.
*
* ICU APIs designed before ICU 2.4 usually define service-specific "done"
* values, mostly 0xffff.
* Those may need to be distinguished from
* actual U+ffff text contents by calling functions like
* CharacterIterator::hasNext() or UnicodeString::length().
*
* @return -1
* @see UChar32
* @stable ICU 2.4
*/
#define U_SENTINEL (-1)
#include "unicode/urename.h"
#endif

View File

@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.

View File

@ -0,0 +1 @@
// This file must exist in order for `utf8.h` and `utf16.h` to be used.

View File

@ -0,0 +1,733 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2012, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf16.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep09
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 16-bit Unicode handling macros
*
* This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF16_H__
#define __UTF16_H__
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit alone encode a code point (BMP, not a surrogate)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
/**
* Is this code unit a lead surrogate (U+d800..U+dbff)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
/**
* Is this code unit a trail surrogate (U+dc00..U+dfff)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
/**
* Is this code unit a surrogate (U+d800..U+dfff)?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a lead surrogate?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
/**
* Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
* is it a trail surrogate?
* @param c 16-bit code unit
* @return TRUE or FALSE
* @stable ICU 4.2
*/
#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
/**
* Helper constant for U16_GET_SUPPLEMENTARY.
* @internal
*/
#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
/**
* Get a supplementary code point value (U+10000..U+10ffff)
* from its lead and trail surrogates.
* The result is undefined if the input values are not
* lead and trail surrogates.
*
* @param lead lead surrogate (U+d800..U+dbff)
* @param trail trail surrogate (U+dc00..U+dfff)
* @return supplementary code point (U+10000..U+10ffff)
* @stable ICU 2.4
*/
#define U16_GET_SUPPLEMENTARY(lead, trail) \
(((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
/**
* Get the lead surrogate (0xd800..0xdbff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return lead surrogate (U+d800..U+dbff) for supplementary
* @stable ICU 2.4
*/
#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
/**
* Get the trail surrogate (0xdc00..0xdfff) for a
* supplementary code point (0x10000..0x10ffff).
* @param supplementary 32-bit code point (U+10000..U+10ffff)
* @return trail surrogate (U+dc00..U+dfff) for supplementary
* @stable ICU 2.4
*/
#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
/**
* How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
* The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
* @param c 32-bit code point
* @return 1 or 2
* @stable ICU 2.4
*/
#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
/**
* The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
* @return 2
* @stable ICU 2.4
*/
#define U16_MAX_LENGTH 2
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
* The result is undefined if the offset points to a single, unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_GET
* @stable ICU 2.4
*/
#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
if(U16_IS_SURROGATE_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
} else { \
(c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to that unpaired surrogate.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 2.4
*/
#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The offset may point to either the lead or trail surrogate unit
* for a supplementary code point, in which case the macro will read
* the adjacent matching surrogate as well.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to a single, unpaired surrogate, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_GET_UNSAFE
* @stable ICU 60
*/
#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[i]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c)) { \
if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} else { \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset points to a single, unpaired lead surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_NEXT
* @stable ICU 2.4
*/
#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
(c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_LEAD(c)) { \
uint16_t __c2; \
if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead surrogate unit
* for a supplementary code point, in which case the macro will read
* the following trail surrogate as well.
* If the offset points to a trail surrogate or
* to a single, unpaired lead surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @param c output UChar32 variable
* @see U16_NEXT_UNSAFE
* @stable ICU 60
*/
#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[(i)++]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
++(i); \
(c)=U16_GET_SUPPLEMENTARY((c), __c2); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const UChar * string buffer
* @param i string offset
* @param c code point to append
* @see U16_APPEND
* @stable ICU 2.4
*/
#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 or 2 code units.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a surrogate pair is written, checks for sufficient space in the string.
* If the code point is not valid or a trail surrogate does not fit,
* then isError is set to TRUE.
*
* @param s const UChar * string buffer
* @param i string offset, must be i<capacity
* @param capacity size of the string buffer
* @param c code point to append
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
* @see U16_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
if((uint32_t)(c)<=0xffff) { \
(s)[(i)++]=(uint16_t)(c); \
} else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
(s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
} else /* c>0x10ffff or not enough space */ { \
(isError)=TRUE; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_FWD_1
* @stable ICU 2.4
*/
#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i string offset, must be i<length
* @param length string length
* @see U16_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_FWD_N
* @stable ICU 2.4
*/
#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U16_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U16_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_START
* @stable ICU 2.4
*/
#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to the trail surrogate of a surrogate pair,
* then the offset is decremented.
* Otherwise, it is not modified.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<=i
* @see U16_SET_CP_START_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-16.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind a single, unpaired trail surrogate.
*
* @param s const UChar * string
* @param i string offset
* @param c output UChar32 variable
* @see U16_PREV
* @stable ICU 2.4
*/
#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
(c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to that unpaired surrogate.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_TRAIL(c)) { \
uint16_t __c2; \
if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a trail surrogate unit
* for a supplementary code point, then the macro will read
* the preceding lead surrogate as well.
* If the offset is behind a lead surrogate or behind a single, unpaired
* trail surrogate, then c is set to U+FFFD.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @param c output UChar32 variable
* @see U16_PREV_UNSAFE
* @stable ICU 60
*/
#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(s)[--(i)]; \
if(U16_IS_SURROGATE(c)) { \
uint16_t __c2; \
if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
--(i); \
(c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
} else { \
(c)=0xfffd; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_BACK_1
* @stable ICU 2.4
*/
#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start starting string offset (usually 0)
* @param i string offset, must be start<i
* @see U16_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
--(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @param n number of code points to skip
* @see U16_BACK_N
* @stable ICU 2.4
*/
#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U16_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* @param s const UChar * string
* @param start start of string
* @param i string offset, must be start<i
* @param n number of code points to skip
* @see U16_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U16_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-16.
*
* @param s const UChar * string
* @param i string offset
* @see U16_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U16_IS_LEAD((s)[(i)-1])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind the lead surrogate of a surrogate pair,
* then the offset is incremented.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, handles unpaired surrogates and checks for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const UChar * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, start<=i<=length
* @param length int32_t string length
* @see U16_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
++(i); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@ -0,0 +1,881 @@
// © 2016 and later: Unicode, Inc. and others.
// License & terms of use: http://www.unicode.org/copyright.html
/*
*******************************************************************************
*
* Copyright (C) 1999-2015, International Business Machines
* Corporation and others. All Rights Reserved.
*
*******************************************************************************
* file name: utf8.h
* encoding: UTF-8
* tab size: 8 (not used)
* indentation:4
*
* created on: 1999sep13
* created by: Markus W. Scherer
*/
/**
* \file
* \brief C API: 8-bit Unicode handling macros
*
* This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
*
* For more information see utf.h and the ICU User Guide Strings chapter
* (http://userguide.icu-project.org/strings).
*
* <em>Usage:</em>
* ICU coding guidelines for if() statements should be followed when using these macros.
* Compound statements (curly braces {}) must be used for if-else-while...
* bodies and all macro statements should be terminated with semicolon.
*/
#ifndef __UTF8_H__
#define __UTF8_H__
#include "unicode/umachine.h"
#ifndef __UTF_H__
# include "unicode/utf.h"
#endif
/* internal definitions ----------------------------------------------------- */
/**
* Counts the trail bytes for a UTF-8 lead byte.
* Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES(leadByte) \
(U8_IS_LEAD(leadByte) ? \
((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
/**
* Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
* Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
* leadByte might be evaluated multiple times.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
*
* @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
* @internal
*/
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
(((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
/**
* Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is called by public macros in this file and thus must remain stable.
* @internal
*/
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
/**
* Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* Lead byte E0..EF bits 3..0 are used as byte index,
* first trail byte bits 7..5 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD3_AND_T1
* @internal
*/
#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
/**
* Internal 3-byte UTF-8 validity check.
* Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
/**
* Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
* Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
* First trail byte bits 7..4 are used as byte index,
* lead byte F0..F4 bits 2..0 are used as bit index into that byte.
* @see U8_IS_VALID_LEAD4_AND_T1
* @internal
*/
#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
/**
* Internal 4-byte UTF-8 validity check.
* Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
* @internal
*/
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
/**
* Function for handling "next code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE UChar32 U_EXPORT2
utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
/**
* Function for handling "append code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE int32_t U_EXPORT2
utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
/**
* Function for handling "previous code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE UChar32 U_EXPORT2
utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
/**
* Function for handling "skip backward one code point" with error-checking.
*
* This is internal since it is not meant to be called directly by external clients;
* however it is U_STABLE (not U_INTERNAL) since it is called by public macros in this
* file and thus must remain stable, and should not be hidden when other internal
* functions are hidden (otherwise public macros would fail to compile).
* @internal
*/
U_STABLE int32_t U_EXPORT2
utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
/* single-code point definitions -------------------------------------------- */
/**
* Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U8_IS_SINGLE(c) (((c)&0x80)==0)
/**
* Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
// 0x32=0xf4-0xc2
/**
* Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
* @param c 8-bit code unit (byte)
* @return TRUE or FALSE
* @stable ICU 2.4
*/
#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
/**
* How many code units (bytes) are used for the UTF-8 encoding
* of this Unicode code point?
* @param c 32-bit code point
* @return 1..4, or 0 if c is a surrogate or not a Unicode code point
* @stable ICU 2.4
*/
#define U8_LENGTH(c) \
((uint32_t)(c)<=0x7f ? 1 : \
((uint32_t)(c)<=0x7ff ? 2 : \
((uint32_t)(c)<=0xd7ff ? 3 : \
((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
((uint32_t)(c)<=0xffff ? 3 : 4)\
) \
) \
) \
)
/**
* The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
* @return 4
* @stable ICU 2.4
*/
#define U8_MAX_LENGTH 4
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
* The result is undefined if the offset points to an illegal UTF-8
* byte sequence.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_GET
* @stable ICU 2.4
*/
#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_unsafe_index=(int32_t)(i); \
U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to a negative value.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_GET_UNSAFE
* @stable ICU 2.4
*/
#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a random-access offset,
* without changing the offset.
* The offset may point to either the lead byte or one of the trail bytes
* for a code point, in which case the macro will read all of the bytes
* for the code point.
*
* The length can be negative for a NUL-terminated string.
*
* If the offset points to an illegal UTF-8 byte sequence, then
* c is set to U+FFFD.
* Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_GET() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset
* @param i int32_t string offset, must be start<=i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_GET
* @stable ICU 51
*/
#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
int32_t _u8_get_index=(i); \
U8_SET_CP_START(s, start, _u8_get_index); \
U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
} UPRV_BLOCK_MACRO_END
/* definitions with forward iteration --------------------------------------- */
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* The result is undefined if the offset points to a trail byte
* or an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_NEXT
* @stable ICU 2.4
*/
#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
if((c)<0xe0) { \
(c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
} else if((c)<0xf0) { \
/* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
(c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
(i)+=2; \
} else { \
(c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
(i)+=3; \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to a negative value.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_NEXT_UNSAFE
* @stable ICU 2.4
*/
#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
/**
* Get a code point from a string at a code point boundary offset,
* and advance the offset to the next code point boundary.
* (Post-incrementing forward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* The offset may point to the lead byte of a multi-byte sequence,
* in which case the macro will read the whole sequence.
* If the offset points to a trail byte or an illegal UTF-8 sequence, then
* c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_NEXT() if that distinction is important.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_NEXT
* @stable ICU 51
*/
#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
/** @internal */
#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[(i)++]; \
if(!U8_IS_SINGLE(c)) { \
uint8_t __t = 0; \
if((i)!=(length) && \
/* fetch/validate/assemble all but last trail byte */ \
((c)>=0xe0 ? \
((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
(__t&=0x3f, 1) \
: /* U+10000..U+10FFFF */ \
((c)-=0xf0)<=4 && \
U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
(__t=(s)[i]-0x80)<=0x3f) && \
/* valid second-to-last trail byte */ \
((c)=((c)<<6)|__t, ++(i)!=(length)) \
: /* U+0080..U+07FF */ \
(c)>=0xc2 && ((c)&=0x1f, 1)) && \
/* last trail byte */ \
(__t=(s)[i]-0x80)<=0x3f && \
((c)=((c)<<6)|__t, ++(i), 1)) { \
} else { \
(c)=(sub); /* ill-formed*/ \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Unsafe" macro, assumes a valid code point and sufficient space in the string.
* Otherwise, the result is undefined.
*
* @param s const uint8_t * string buffer
* @param i string offset
* @param c code point to append
* @see U8_APPEND
* @stable ICU 2.4
*/
#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else { \
if(__uc<=0x7ff) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
} else { \
if(__uc<=0xffff) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
} else { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
} \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Append a code point to a string, overwriting 1 to 4 bytes.
* The offset points to the current end of the string contents
* and is advanced (post-increment).
* "Safe" macro, checks for a valid code point.
* If a non-ASCII code point is written, checks for sufficient space in the string.
* If the code point is not valid or trail bytes do not fit,
* then isError is set to TRUE.
*
* @param s const uint8_t * string buffer
* @param i int32_t string offset, must be i<capacity
* @param capacity int32_t size of the string buffer
* @param c UChar32 code point to append
* @param isError output UBool set to TRUE if an error occurs, otherwise not modified
* @see U8_APPEND_UNSAFE
* @stable ICU 2.4
*/
#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
uint32_t __uc=(c); \
if(__uc<=0x7f) { \
(s)[(i)++]=(uint8_t)__uc; \
} else if(__uc<=0x7ff && (i)+1<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
(s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
(s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
(s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
} else { \
(isError)=TRUE; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_FWD_1
* @stable ICU 2.4
*/
#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
(i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the next.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @see U8_FWD_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
uint8_t __b=(s)[(i)++]; \
if(U8_IS_LEAD(__b) && (i)!=(length)) { \
uint8_t __t1=(s)[i]; \
if((0xe0<=__b && __b<0xf0)) { \
if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} else if(__b<0xe0) { \
if(U8_IS_TRAIL(__t1)) { \
++(i); \
} \
} else /* c>=0xf0 */ { \
if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
++(i); \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_FWD_N
* @stable ICU 2.4
*/
#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_FWD_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Advance the string offset from one code point boundary to the n-th next one,
* i.e., move forward by n code points.
* (Post-incrementing iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param i int32_t string offset, must be i<length
* @param length int32_t string length
* @param n number of code points to skip
* @see U8_FWD_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
U8_FWD_1(s, i, length); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_START
* @stable ICU 2.4
*/
#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[i])) { --(i); } \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary
* at the start of a code point.
* If the offset points to a UTF-8 trail byte,
* then the offset is moved backward to the corresponding lead byte.
* Otherwise, it is not modified.
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i
* @see U8_SET_CP_START_UNSAFE
* @see U8_TRUNCATE_IF_INCOMPLETE
* @stable ICU 2.4
*/
#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* If the string ends with a UTF-8 byte sequence that is valid so far
* but incomplete, then reduce the length of the string to end before
* the lead byte of that incomplete sequence.
* For example, if the string ends with E1 80, the length is reduced by 2.
*
* In all other cases (the string ends with a complete sequence, or it is not
* possible for any further trail byte to extend the trailing sequence)
* the length remains unchanged.
*
* Useful for processing text split across multiple buffers
* (save the incomplete sequence for later)
* and for optimizing iteration
* (check for string length only once per character).
*
* "Safe" macro, checks for illegal sequences and for string boundaries.
* Unlike U8_SET_CP_START(), this macro never reads s[length].
*
* (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param length int32_t string length (usually start<=length)
* @see U8_SET_CP_START
* @stable ICU 61
*/
#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
if((length)>(start)) { \
uint8_t __b1=s[(length)-1]; \
if(U8_IS_SINGLE(__b1)) { \
/* common ASCII character */ \
} else if(U8_IS_LEAD(__b1)) { \
--(length); \
} else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
uint8_t __b2=s[(length)-2]; \
if(0xe0<=__b2 && __b2<=0xf4) { \
if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
(length)-=2; \
} \
} else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
uint8_t __b3=s[(length)-3]; \
if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
(length)-=3; \
} \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/* definitions with backward iteration -------------------------------------- */
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Unsafe" macro, assumes well-formed UTF-8.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* The result is undefined if the offset is behind an illegal UTF-8 sequence.
*
* @param s const uint8_t * string
* @param i string offset
* @param c output UChar32 variable
* @see U8_PREV
* @stable ICU 2.4
*/
#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(U8_IS_TRAIL(c)) { \
uint8_t __b, __count=1, __shift=6; \
\
/* c is a trail byte */ \
(c)&=0x3f; \
for(;;) { \
__b=(s)[--(i)]; \
if(__b>=0xc0) { \
U8_MASK_LEAD_BYTE(__b, __count); \
(c)|=(UChar32)__b<<__shift; \
break; \
} else { \
(c)|=(UChar32)(__b&0x3f)<<__shift; \
++__count; \
__shift+=6; \
} \
} \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to <0 in case of an error
* @see U8_PREV_UNSAFE
* @stable ICU 2.4
*/
#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one
* and get the code point between them.
* (Pre-decrementing backward iteration.)
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The input offset may be the same as the string length.
* If the offset is behind a multi-byte sequence, then the macro will read
* the whole sequence.
* If the offset is behind a lead byte, then that itself
* will be returned as the code point.
* If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
*
* This macro does not distinguish between a real U+FFFD in the text
* and U+FFFD returned for an ill-formed sequence.
* Use U8_PREV() if that distinction is important.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @param c output UChar32 variable, set to U+FFFD in case of an error
* @see U8_PREV
* @stable ICU 51
*/
#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
(c)=(uint8_t)(s)[--(i)]; \
if(!U8_IS_SINGLE(c)) { \
(c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_BACK_1
* @stable ICU 2.4
*/
#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
while(U8_IS_TRAIL((s)[--(i)])) {} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the previous one.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<i
* @see U8_BACK_1_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
if(U8_IS_TRAIL((s)[--(i)])) { \
(i)=utf8_back1SafeBody(s, start, (i)); \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @param n number of code points to skip
* @see U8_BACK_N
* @stable ICU 2.4
*/
#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0) { \
U8_BACK_1_UNSAFE(s, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Move the string offset from one code point boundary to the n-th one before it,
* i.e., move backward by n code points.
* (Pre-decrementing backward iteration.)
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* @param s const uint8_t * string
* @param start int32_t index of the start of the string
* @param i int32_t string offset, must be start<i
* @param n number of code points to skip
* @see U8_BACK_N_UNSAFE
* @stable ICU 2.4
*/
#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
int32_t __N=(n); \
while(__N>0 && (i)>(start)) { \
U8_BACK_1(s, start, i); \
--__N; \
} \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Unsafe" macro, assumes well-formed UTF-8.
*
* @param s const uint8_t * string
* @param i string offset
* @see U8_SET_CP_LIMIT
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
U8_BACK_1_UNSAFE(s, i); \
U8_FWD_1_UNSAFE(s, i); \
} UPRV_BLOCK_MACRO_END
/**
* Adjust a random-access offset to a code point boundary after a code point.
* If the offset is behind a partial multi-byte sequence,
* then the offset is incremented to behind the whole sequence.
* Otherwise, it is not modified.
* The input offset may be the same as the string length.
* "Safe" macro, checks for illegal sequences and for string boundaries.
*
* The length can be negative for a NUL-terminated string.
*
* @param s const uint8_t * string
* @param start int32_t starting string offset (usually 0)
* @param i int32_t string offset, must be start<=i<=length
* @param length int32_t string length
* @see U8_SET_CP_LIMIT_UNSAFE
* @stable ICU 2.4
*/
#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
if((start)<(i) && ((i)<(length) || (length)<0)) { \
U8_BACK_1(s, start, i); \
U8_FWD_1(s, i, length); \
} \
} UPRV_BLOCK_MACRO_END
#endif

View File

@ -0,0 +1,24 @@
"calloc",
"free",
"iswalnum",
"iswalpha",
"iswblank",
"iswdigit",
"iswlower",
"iswspace",
"iswupper",
"iswxdigit",
"malloc",
"memchr",
"memcmp",
"memcpy",
"memmove",
"memset",
"realloc",
"strcmp",
"strlen",
"strncat",
"strncmp",
"strncpy",
"towlower",
"towupper",

View File

@ -0,0 +1,113 @@
// This file implements a very simple allocator for external scanners running
// in WASM. Allocation is just bumping a static pointer and growing the heap
// as needed, and freeing is mostly a noop. But in the special case of freeing
// the last-allocated pointer, we'll reuse that pointer again.
#ifdef TREE_SITTER_FEATURE_WASM
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
extern void tree_sitter_debug_message(const char *, size_t);
#define PAGESIZE 0x10000
#define MAX_HEAP_SIZE (4 * 1024 * 1024)
typedef struct {
size_t size;
char data[0];
} Region;
static Region *heap_end = NULL;
static Region *heap_start = NULL;
static Region *next = NULL;
// Get the region metadata for the given heap pointer.
static inline Region *region_for_ptr(void *ptr) {
return ((Region *)ptr) - 1;
}
// Get the location of the next region after the given region,
// if the given region had the given size.
static inline Region *region_after(Region *self, size_t len) {
char *address = self->data + len;
char *aligned = (char *)((uintptr_t)(address + 3) & ~0x3);
return (Region *)aligned;
}
static void *get_heap_end() {
return (void *)(__builtin_wasm_memory_size(0) * PAGESIZE);
}
static int grow_heap(size_t size) {
size_t new_page_count = ((size - 1) / PAGESIZE) + 1;
return __builtin_wasm_memory_grow(0, new_page_count) != SIZE_MAX;
}
// Clear out the heap, and move it to the given address.
void reset_heap(void *new_heap_start) {
heap_start = new_heap_start;
next = new_heap_start;
heap_end = get_heap_end();
}
void *malloc(size_t size) {
Region *region_end = region_after(next, size);
if (region_end > heap_end) {
if ((char *)region_end - (char *)heap_start > MAX_HEAP_SIZE) {
return NULL;
}
if (!grow_heap(size)) return NULL;
heap_end = get_heap_end();
}
void *result = &next->data;
next->size = size;
next = region_end;
return result;
}
void free(void *ptr) {
if (ptr == NULL) return;
Region *region = region_for_ptr(ptr);
Region *region_end = region_after(region, region->size);
// When freeing the last allocated pointer, re-use that
// pointer for the next allocation.
if (region_end == next) {
next = region;
}
}
void *calloc(size_t count, size_t size) {
void *result = malloc(count * size);
memset(result, 0, count * size);
return result;
}
void *realloc(void *ptr, size_t new_size) {
if (ptr == NULL) {
return malloc(new_size);
}
Region *region = region_for_ptr(ptr);
Region *region_end = region_after(region, region->size);
// When reallocating the last allocated region, return
// the same pointer, and skip copying the data.
if (region_end == next) {
next = region;
return malloc(new_size);
}
void *result = malloc(new_size);
memcpy(result, &region->data, region->size);
return result;
}
#endif

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,31 @@
#ifndef TREE_SITTER_WASM_H_
#define TREE_SITTER_WASM_H_
#ifdef __cplusplus
extern "C" {
#endif
#include "tree_sitter/api.h"
#include "./parser.h"
bool ts_wasm_store_start(TSWasmStore *self, TSLexer *lexer, const TSLanguage *language);
void ts_wasm_store_reset(TSWasmStore *self);
bool ts_wasm_store_has_error(const TSWasmStore *self);
bool ts_wasm_store_call_lex_main(TSWasmStore *self, TSStateId state);
bool ts_wasm_store_call_lex_keyword(TSWasmStore *self, TSStateId state);
uint32_t ts_wasm_store_call_scanner_create(TSWasmStore *self);
void ts_wasm_store_call_scanner_destroy(TSWasmStore *self, uint32_t scanner_address);
bool ts_wasm_store_call_scanner_scan(TSWasmStore *self, uint32_t scanner_address, uint32_t valid_tokens_ix);
uint32_t ts_wasm_store_call_scanner_serialize(TSWasmStore *self, uint32_t scanner_address, char *buffer);
void ts_wasm_store_call_scanner_deserialize(TSWasmStore *self, uint32_t scanner, const char *buffer, unsigned length);
void ts_wasm_language_retain(const TSLanguage *self);
void ts_wasm_language_release(const TSLanguage *self);
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_WASM_H_

View File

@ -0,0 +1,53 @@
void Foo::foo(
int a,
float b,
);
void Foo::bar();
void func() {}
void func2();
void func2() {}
struct Foo1 {
};
typedef struct Foo2 Bar1;
typedef struct Foo3
{
} Bar2;
typedef enum Enum
{
} EnumName;
enum Enum
{
}
typedef enum Enum {
Foo;
} Enum;
class Foo
{
Foo();
~Foo();
public:
void foo();
private:
void bar();
}
Foo::Foo() {}
Foo::~Foo() {}

View File

@ -0,0 +1,37 @@
import { Foo } from "@test"
type Type = {
foo: number
}
const bar = require("somelib")
const lit_str: string = "Hello"
let lit_num: number = 5.314;
const arrow_proc = async (
arg1: number
): Promise<number> => {
return number
}
function normal_function(arg: string) {
return "Foobar"
}
class MyClass
{
bar: number;
constructor() {
console.log("QQQ!!!")
Foo.some_proc()
normal_function();
const myT: Type;
}
proc() {
}
}

View File

@ -0,0 +1,53 @@
#import "Basic";
Basic :: #import "Basic";
Options :: struct
{
foo: int;
}
Enum :: enum
{
Test;
Inavlid;
}
main :: () -> void
{
options1: Options;
options2 := Options.{
foo = bar
};
foo :: (foo: int) {
}
array_lit1: [..]string;
array_lit2 := string.["hello", "there\n"];
lit_str := "string";
lit_int := 5555;
lit_float := 6.28;
lit_bool := float;
lit_char := #char "c";
if cnd {
}
if cnd2 == {
case 5;
case .Bar;
case;
}
for i: 0..5 { continue; break; }
for arr {}
proc_call();
Basic.print();
return;
}

View File

@ -12,6 +12,7 @@ patterns = {
};
blacklist_patterns = {
".*",
"non-source",
};
load_paths_base = {
{ ".", .relative = true, .recursive = true, },
@ -27,9 +28,9 @@ commands = {
.out = "*compilation*",
.footer_panel = true,
.save_dirty_files = true,
.win = "code\\bin\\build.bat",
.linux = "./code/bin/package.sh",
.mac = "code/bin/package-mac.sh", },
.win = "bash build_new\\scripts\\build.sh",
.linux = "build_new/scripts/build.sh",
.mac = "build_new/scripts/build.sh", },
.run = { .out = "*run*", .footer_panel = false, .save_dirty_files = false,
.win = "build\\4ed.exe",
.linux = "build/4ed",