4coder/test/4cpp_new_lexer.h

251 lines
6.7 KiB
C

// TOP
#ifndef FCPP_NEW_LEXER_INC
#define FCPP_NEW_LEXER_INC
#include "../4cpp_lexer_types.h"
#define lexer_link static
lexer_link Cpp_Get_Token_Result
cpp_get_token(Cpp_Token_Stack *token_stack, int pos){
Cpp_Get_Token_Result result = {};
Cpp_Token *token_array = token_stack->tokens;
Cpp_Token *token = 0;
int first = 0;
int count = token_stack->count;
int last = count;
int this_start = 0, next_start = 0;
if (count > 0){
for (;;){
result.token_index = (first + last)/2;
token = token_array + result.token_index;
this_start = token->start;
if (result.token_index + 1 < count){
next_start = (token + 1)->start;
}
else{
next_start = this_start + token->size;
}
if (this_start <= pos && pos < next_start){
break;
}
else if (pos < this_start){
last = result.token_index;
}
else{
first = result.token_index + 1;
}
if (first == last){
result.token_index = first;
break;
}
}
if (result.token_index == count){
--result.token_index;
result.in_whitespace = 1;
}
else{
if (token->start + token->size <= pos){
result.in_whitespace = 1;
}
}
}
else{
result.token_index = -1;
result.in_whitespace = 1;
}
return(result);
}
lexer_link void
cpp_shift_token_starts(Cpp_Token_Stack *stack, int from_token_i, int shift_amount){
Cpp_Token *token = stack->tokens + from_token_i;
int count = stack->count, i;
for (i = from_token_i; i < count; ++i, ++token){
token->start += shift_amount;
}
}
enum Lex_State{
LS_default,
LS_comment_pre,
LS_comment,
LS_comment_block,
LS_comment_block_ending,
LS_dot,
LS_less,
LS_more,
};
struct Lex_Data{
int token_start;
int token_end;
int completed;
};
lexer_link Lex_Data
cpp_lex_nonalloc(char *chunk, int file_absolute_pos, int size, Cpp_Token_Stack *token_stack_out){
Cpp_Token *out_tokens = token_stack_out->tokens;
int token_i = token_stack_out->count;
int max_token_i = token_stack_out->max_count;
Cpp_Token token = {};
int pos = file_absolute_pos;
int end_pos = size + file_absolute_pos;
unsigned short state = LS_default;
unsigned short pp_state = 0;
Lex_Data lex_data = {};
int emit_token = 0;
char c;
chunk -= file_absolute_pos;
for (; pos < end_pos && token_i < max_token_i; ++pos){
for (; pos < end_pos;){
c = chunk[pos++];
if (!(c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' || c == '\v')) break;
}
--pos;
lex_data.token_start = pos;
state = LS_default;
emit_token = 0;
for (; emit_token == 0 && pos < end_pos;){
c = chunk[pos++];
switch (state){
case LS_default:
switch (c){
case '/': state = LS_comment_pre; break;
#define OperCase(op,type) case op: emit_token = 1; break;
OperCase('{', CPP_TOKEN_BRACE_OPEN);
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
OperCase('~', CPP_TOKEN_TILDE);
OperCase(',', CPP_TOKEN_COMMA);
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
#undef OperCase
#if 0
case '.': state = LS_dot; break;
case '<': state = LS_less; break;
case '>': state = LS_more; break;
#endif
}
break;
case LS_dot:
break;
case LS_less:
break;
case LS_more:
break;
case LS_comment_pre:
switch (c){
case '/': state = LS_comment; break;
case '*': state = LS_comment_block; break;
}
break;
case LS_comment:
switch (c){
case '\n': emit_token = 1; break;
}
break;
case LS_comment_block:
switch (c){
case '*': state = LS_comment_block_ending; break;
}
break;
case LS_comment_block_ending:
switch (c){
case '*': state = LS_comment_block_ending; break;
case '/': emit_token = 1; break;
default: state = LS_comment_block; break;
}
break;
}
}
if (emit_token){
--pos;
lex_data.token_end = pos;
switch (state){
case LS_default:
switch (chunk[pos]){
#define OperCase(op,t) case op: token.type = t; break;
OperCase('{', CPP_TOKEN_BRACE_OPEN);
OperCase('}', CPP_TOKEN_BRACE_CLOSE);
OperCase('[', CPP_TOKEN_BRACKET_OPEN);
OperCase(']', CPP_TOKEN_BRACKET_CLOSE);
OperCase('(', CPP_TOKEN_PARENTHESE_OPEN);
OperCase(')', CPP_TOKEN_PARENTHESE_CLOSE);
OperCase('~', CPP_TOKEN_TILDE);
OperCase(',', CPP_TOKEN_COMMA);
OperCase('?', CPP_TOKEN_TERNARY_QMARK);
#undef OperCase
}
token.flags = CPP_TFLAG_IS_OPERATOR;
break;
case LS_comment: case LS_comment_block_ending:
token.type = CPP_TOKEN_COMMENT;
token.flags = 0;
c = chunk[pos];
while (c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\v' || c == '\f'){
--pos;
c = chunk[pos];
}
++pos;
break;
}
token.start = lex_data.token_start;
token.size = pos - lex_data.token_start;
token.state_flags = pp_state;
out_tokens[token_i++] = token;
pos = lex_data.token_end;
}
}
token_stack_out->count = token_i;
if (pos == end_pos) lex_data.completed = 1;
return(lex_data);
}
#endif
// BOTTOM