/*
4coder_metadata_generator.cpp - A preprocessor program for generating a list of commands and their descriptions.

TYPE: 'code-preprocessor'
*/

// TOP

#include "4coder_lib/4coder_mem.h"
#define FSTRING_IMPLEMENTATION
#include "4coder_lib/4coder_string.h"
#include "4cpp/4cpp_lexer.h"

#include <stdlib.h>
#include <stdio.h>
#include <stdint.h>

typedef int32_t bool32;

//// WINDOWS BEGIN ////
#define UNICODE
#include <Windows.h>
typedef TCHAR Filename_Character;
//// WINDOWS END ////

struct File_Info{
    Filename_Character *name;
    int32_t len;
    bool32 is_folder;
};

struct File_List{
    File_Info *info;
    int32_t count;
    int32_t final_length;
    Filename_Character final_name[4096];
};

static File_List
get_file_list(Partition *part, Filename_Character *dir);

static Filename_Character*
encode(Partition *part, char *str);

static char*
unencode(Partition *part, Filename_Character *str, int32_t len);

//// WINDOWS BEGIN ////
static bool32
is_code_file(Filename_Character *name, int32_t len){
    bool32 is_code = false;
    if (len >= 5){
        Filename_Character *ext = &name[len - 4];
        if (ext[0] == '.' && ext[1] == 'c' && ext[2] == 'p' && ext[3] == 'p'){
            is_code = true;
        }
        else if (ext[0] == '.' && ext[1] == 'h' && ext[2] == 'p' && ext[3] == 'p'){
            is_code = true;
        }
    }
    if (len >= 4){
        Filename_Character *ext = &name[len - 3];
        if (ext[0] == '.' && ext[1] == 'c' && ext[2] == 'c'){
            is_code = true;
        }
    }
    if (len >= 3){
        Filename_Character *ext = &name[len - 2];
        if (ext[0] == '.' && ext[1] == 'h'){
            is_code = true;
        }
        else if (ext[0] == '.' && ext[1] == 'c'){
            is_code = true;
        }
    }
    return(is_code);
}

static File_List
get_file_list(Partition *part, Filename_Character *dir){
    if (part == 0){
        fprintf(stdout, "fatal error: NULL part passed to %s\n", __FUNCTION__);
        exit(1);
    }
    if (dir == 0){
        fprintf(stdout, "fatal error: NULL dir passed to %s\n", __FUNCTION__);
        exit(1);
    }
    
    File_List list = {0};
    Temp_Memory part_reset = begin_temp_memory(part);
    
    HANDLE dir_handle =
        CreateFile(dir,
                   FILE_LIST_DIRECTORY,
                   FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE,
                   0,
                   OPEN_EXISTING,
                   FILE_FLAG_BACKUP_SEMANTICS | FILE_FLAG_OVERLAPPED,
                   0);
    
    if (dir_handle == INVALID_HANDLE_VALUE){
        fprintf(stdout, "fatal error: could not open directory handle\n");
        exit(1);
    }
    
    Filename_Character final_name[4096];
    DWORD final_length = GetFinalPathNameByHandle(dir_handle, final_name, sizeof(final_name), 0);
    if (final_length > sizeof(final_name)){
        fprintf(stdout, "fatal error: path name too long for local buffer\n");
        exit(1);
    }
    CloseHandle(dir_handle);
    
    final_length -= 4;
    memmove(final_name, final_name + 4, final_length*sizeof(*final_name));
    final_name[final_length] = '\\';
    final_name[final_length + 1] = '*';
    final_name[final_length + 2] = 0;
    
    WIN32_FIND_DATA find_data = {0};
    HANDLE search = FindFirstFile(final_name, &find_data);
    if (search == INVALID_HANDLE_VALUE){
        fprintf(stdout, "fatal error: could not begin a file search\n");
        exit(1);
    }
    
    int32_t character_count = 0;
    int32_t file_count = 0;
    BOOL more_files = true;
    do{
        Filename_Character *name = &find_data.cFileName[0];
        
        int32_t size = 0;
        for(;name[size];++size);
        
        uint32_t attribs = find_data.dwFileAttributes;
        bool32 is_folder = ((attribs & FILE_ATTRIBUTE_DIRECTORY) != 0);
        
        if (name[0] != '.' && (is_folder || is_code_file(name, size))){
            ++file_count;
            character_count += size + 1;
        }
        
        more_files = FindNextFile(search, &find_data);
    }while(more_files);
    FindClose(search);
    
    int32_t rounded_char_size = (character_count*sizeof(Filename_Character) + 7)&(~7);
    int32_t memsize = rounded_char_size + file_count*sizeof(File_Info);
    void *mem = push_array(part, uint8_t, memsize);
    if (mem == 0){
        fprintf(stdout, "fatal error: not enough memory on the partition for a file list.\n");
        exit(1);
    }
    
    Filename_Character *char_ptr = (Filename_Character*)mem;
    File_Info *info_ptr = (File_Info*)((uint8_t*)mem + rounded_char_size);
    
    Filename_Character *char_ptr_end = (Filename_Character*)info_ptr;
    File_Info *info_ptr_end = info_ptr + file_count;
    
    File_Info *info_ptr_base = info_ptr;
    
    search = FindFirstFile(final_name, &find_data);
    if (search == INVALID_HANDLE_VALUE){
        fprintf(stdout, "fatal error: could not restart a file search\n");
        exit(1);
    }
    
    int32_t adjusted_file_count = 0;
    more_files = true;
    do{
        Filename_Character *name = &find_data.cFileName[0];
        
        int32_t size = 0;
        for(;name[size]!=0;++size);
        
        uint32_t attribs = find_data.dwFileAttributes;
        bool32 is_folder = ((attribs & FILE_ATTRIBUTE_DIRECTORY) != 0);
        
        if (name[0] != '.' && (is_folder || is_code_file(name, size))){
            if (info_ptr + 1 > info_ptr_end || char_ptr + size + 1 > char_ptr_end){
                memset(&list, 0, sizeof(list));
                end_temp_memory(part_reset);
                FindClose(search);
                return(list);
            }
            
            info_ptr->name = char_ptr;
            info_ptr->len = size;
            info_ptr->is_folder = is_folder;
            
            memmove(char_ptr, name, size*sizeof(*name));
            char_ptr[size] = 0;
            
            char_ptr += size + 1;
            ++info_ptr;
            ++adjusted_file_count;
        }
        
        more_files = FindNextFile(search, &find_data);
    }while(more_files);
    FindClose(search);
    
    list.info = info_ptr_base;
    list.count = adjusted_file_count;
    list.final_length = final_length;
    memcpy(list.final_name, final_name, list.final_length*sizeof(*final_name));
    list.final_name[list.final_length] = 0;
    
    return(list);
}

static Filename_Character*
encode(Partition *part, char *str){
    int32_t size = 0;
    for (;str[size]!=0;++size);
    
    Filename_Character *out = push_array(part, Filename_Character, size + 1);
    push_align(part, 8);
    
    if (out == 0){
        fprintf(stdout, "fatal error: ran out of memory encoding string to filename\n");
        exit(1);
    }
    
    for (int32_t i = 0, j = 0; i <= size; ++i){
        if (str[i] != '"'){
            out[j++] = str[i];
        }
    }
    
    return(out);
}

static char*
unencode(Partition *part, Filename_Character *str, int32_t len){
    Temp_Memory temp = begin_temp_memory(part);
    char *out = push_array(part, char, len + 1);
    push_align(part, 8);
    
    if (out == 0){
        fprintf(stdout, "fatal error: ran out of memory unencoding string to filename\n");
        exit(1);
    }
    
    for (int32_t i = 0; i <= len; ++i){
        if (str[i] <= 127){
            out[i] = (char)str[i];
        }
        else{
            out = 0;
            end_temp_memory(temp);
            break;
        }
    }
    
    return(out);
}
//// WINDOWS END ////

static String
file_dump(Partition *part, char *name){
    String text = {0};
    
    FILE *file = fopen(name, "rb");
    if (file != 0){
        fseek(file, 0, SEEK_END);
        text.size = ftell(file);
        fseek(file, 0, SEEK_SET);
        text.memory_size = text.size + 1;
        text.str = push_array(part, char, text.memory_size);
        fread(text.str, 1, text.size, file);
        terminate_with_null(&text);
        fclose(file);
    }
    
    return(text);
}

static void
error(char *source_name, String text, int32_t pos, char *msg){
    if (pos < 0){
        pos = 0;
    }
    if (pos > text.size){
        pos = text.size;
    }
    
    int32_t line_number = 1;
    int32_t character_pos = 1;
    char *end = text.str + pos;
    for (char *p = text.str; p < end; ++p){
        if (*p == '\n'){
            ++line_number;
            character_pos = 1;
        }
        else{
            ++character_pos;
        }
    }
    
    fprintf(stdout, "%s:%d:%d: %s\n", source_name, line_number, character_pos, msg);
    fflush(stdout);
}

struct Reader{
    char *source_name;
    String text;
    Cpp_Token_Array tokens;
    Cpp_Token *ptr;
};

static Reader
make_reader(Cpp_Token_Array array, char *source_name, String text){
    Reader reader = {0};
    reader.tokens = array;
    reader.ptr = array.tokens;
    reader.source_name = source_name;
    reader.text = text;
    return(reader);
}

static Cpp_Token
prev_token(Reader *reader){
    Cpp_Token result = {0};
    
    for (;;){
        if (reader->ptr > reader->tokens.tokens + reader->tokens.count){
            reader->ptr = reader->tokens.tokens + reader->tokens.count;
        }
        
        if (reader->ptr > reader->tokens.tokens){
            --reader->ptr;
            result = *reader->ptr;
        }
        else{
            reader->ptr = reader->tokens.tokens;
            memset(&result, 0, sizeof(result));
            break;
        }
        
        if (result.type != CPP_TOKEN_COMMENT && result.type != CPP_TOKEN_JUNK){
            break;
        }
    }
    
    return(result);
}

static Cpp_Token
get_token(Reader *reader){
    Cpp_Token result = {0};
    
    for (;;){
        if (reader->ptr < reader->tokens.tokens){
            reader->ptr = reader->tokens.tokens;
        }
        
        if (reader->ptr < reader->tokens.tokens + reader->tokens.count){
            result = *reader->ptr;
            ++reader->ptr;
        }
        else{
            reader->ptr = reader->tokens.tokens + reader->tokens.count;
            memset(&result, 0, sizeof(result));
            result.start = reader->text.size;
            break;
        }
        
        if (result.type != CPP_TOKEN_COMMENT && result.type != CPP_TOKEN_JUNK){
            break;
        }
    }
    
    return(result);
}

static Cpp_Token
peek_token(Reader *reader){
    Cpp_Token result = {0};
    
    if (reader->ptr < reader->tokens.tokens){
        reader->ptr = reader->tokens.tokens;
    }
    
    if (reader->ptr >= reader->tokens.tokens + reader->tokens.count){
        result.start = reader->text.size;
    }
    else{
        result = *reader->ptr;
    }
    
    return(result);
}

static int32_t
peek_pos(Reader *reader){
    Cpp_Token token = peek_token(reader);
    return(token.start);
}

static void
error(Reader *reader, int32_t pos, char *msg){
    error(reader->source_name, reader->text, pos, msg);
}

struct Temp_Read{
    Reader *reader;
    Cpp_Token *pos;
};

static Temp_Read
begin_temp_read(Reader *reader){
    Temp_Read temp = {0};
    temp.reader = reader;
    temp.pos = reader->ptr;
    return(temp);
}

static void
end_temp_read(Temp_Read temp){
    temp.reader->ptr = temp.pos;
}

static String
token_str(String text, Cpp_Token token){
    String str = substr(text, token.start, token.size);
    return(str);
}

static bool32
require_key_identifier(Reader *reader, char *str){
    bool32 success = false;
    
    Cpp_Token token = get_token(reader);
    if (token.type == CPP_TOKEN_IDENTIFIER){
        String lexeme = token_str(reader->text, token);
        if (match(lexeme, str)){
            success = true;
        }
    }
    
    if (!success){
        char space[1024];
        String s = make_fixed_width_string(space);
        copy(&s, "expected to find '");
        append(&s, str);
        append(&s, "'");
        terminate_with_null(&s);
        error(reader, token.start, s.str);
    }
    
    return(success);
}

static bool32
require_open_parenthese(Reader *reader){
    bool32 success = false;
    
    Cpp_Token token = get_token(reader);
    if (token.type == CPP_TOKEN_PARENTHESE_OPEN){
        success = true;
    }
    
    if (!success){
        error(reader, token.start, "expected to find '('");
    }
    
    return(success);
}

static bool32
require_close_parenthese(Reader *reader){
    bool32 success = false;
    
    Cpp_Token token = get_token(reader);
    if (token.type == CPP_TOKEN_PARENTHESE_CLOSE){
        success = true;
    }
    
    if (!success){
        error(reader, token.start, "expected to find ')'");
    }
    
    return(success);
}

static bool32
require_define(Reader *reader){
    bool32 success = false;
    
    Cpp_Token token = get_token(reader);
    if (token.type == CPP_PP_DEFINE){
        success = true;
    }
    
    if (!success){
        error(reader, token.start, "expected to find '#define'");
    }
    
    return(success);
}

static bool32
extract_identifier(Reader *reader, String *str_out){
    bool32 success = false;
    
    Cpp_Token token = get_token(reader);
    if (token.type == CPP_TOKEN_IDENTIFIER){
        String lexeme = token_str(reader->text, token);
        *str_out = lexeme;
        success = true;
    }
    
    if (!success){
        error(reader, token.start, "expected to find an identifier");
    }
    
    return(success);
}

static bool32
extract_string(Reader *reader, String *str_out){
    bool32 success = false;
    
    Cpp_Token token = get_token(reader);
    if (token.type == CPP_TOKEN_STRING_CONSTANT){
        String lexeme = token_str(reader->text, token);
        *str_out = lexeme;
        success = true;
    }
    
    if (!success){
        error(reader, token.start, "expected to find a string literal");
    }
    
    return(success);
}

static bool32
parse_documented_command(Partition *part, Reader *reader){
    String name = {0};
    String doc = {0};
    
    // Getting the command's name
    if (!require_key_identifier(reader, "CUSTOM_COMMAND_SIG")){
        return(false);
    }
    
    if (!require_open_parenthese(reader)){
        return(false);
    }
    
    if (!extract_identifier(reader, &name)){
        return(false);
    }
    
    if (!require_close_parenthese(reader)){
        return(false);
    }
    
    // Getting the command's doc string
    if (!require_key_identifier(reader, "CUSTOM_DOC")){
        return(false);
    }
    
    if (!require_open_parenthese(reader)){
        return(false);
    }
    
    if (!extract_string(reader, &doc)){
        return(false);
    }
    
    if (!require_close_parenthese(reader)){
        return(false);
    }
    
    // TODO(allen): Store into data structure for codegen.
    //error(reader, name_pos, "name of a command");
    //error(reader, str_pos, "doc string of a command");
    
    return(true);
}

static bool32
parse_alias(Partition *part, Reader *reader){
    String name = {0};
    String potential = {0};
    
    // Getting the alias's name
    if (!require_define(reader)){
        return(false);
    }
    
    int32_t name_pos = peek_pos(reader);
    if (!extract_identifier(reader, &name)){
        return(false);
    }
    
    // Getting the alias's target
    if (!require_key_identifier(reader, "CUSTOM_ALIAS")){
        return(false);
    }
    
    if (!require_open_parenthese(reader)){
        return(false);
    }
    
    int32_t potential_pos = peek_pos(reader);
    if (!extract_identifier(reader, &potential)){
        return(false);
    }
    
    if (!require_close_parenthese(reader)){
        return(false);
    }
    
    error(reader, name_pos, "name of an alias");
    error(reader, potential_pos, "name of a potential");
    
    return(true);
}

static void
parse_text(Partition *part, char *source_name, String text){
    Cpp_Token_Array array = cpp_make_token_array(1024);
    cpp_lex_file(text.str, text.size, &array);
    
    Reader reader_ = make_reader(array, source_name, text);
    Reader *reader = &reader_;
    
    for (;;){
        Cpp_Token token = get_token(reader);
        
        if (token.type == CPP_TOKEN_IDENTIFIER){
            String lexeme = token_str(text, token);
            
            bool32 in_preproc_body = ((token.flags & CPP_TFLAG_PP_BODY) != 0);
            
            if (!in_preproc_body && match(lexeme, "CUSTOM_DOC")){
                Temp_Read temp_read = begin_temp_read(reader);
                
                bool32 found_start_pos = false;
                for (int32_t R = 0; R < 5; ++R){
                    Cpp_Token p_token = prev_token(reader);
                    if (p_token.type == CPP_TOKEN_IDENTIFIER){
                        String p_lexeme = token_str(text, p_token);
                        if (match(p_lexeme, "CUSTOM_COMMAND_SIG")){
                            found_start_pos = true;
                            break;
                        }
                    }
                    if (p_token.type == 0){
                        break;
                    }
                }
                
                if (!found_start_pos){
                    end_temp_read(temp_read);
                }
                else{
                    if (!parse_documented_command(part, reader)){
                        end_temp_read(temp_read);
                    }
                }
            }
            else if (match(lexeme, "CUSTOM_ALIAS")){
                Temp_Read temp_read = begin_temp_read(reader);
                
                bool32 found_start_pos = false;
                for (int32_t R = 0; R < 3; ++R){
                    Cpp_Token p_token = prev_token(reader);
                    if (p_token.type == CPP_PP_DEFINE){
                        if (R == 2){
                            found_start_pos = true;
                        }
                        break;
                    }
                    if (p_token.type == 0){
                        break;
                    }
                }
                
                if (!found_start_pos){
                    end_temp_read(temp_read);
                }
                else{
                    if (!parse_alias(part, reader)){
                        end_temp_read(temp_read);
                    }
                }
            }
        }
        
        if (token.type == 0){
            break;
        }
    }
    
    cpp_free_token_array(array);
}

static void
parse_file(Partition *part, Filename_Character *name_, int32_t len){
    char *name = unencode(part, name_, len);
    if (name == 0){
        if (sizeof(*name_) == 2){
            fprintf(stdout, "warning: could not unencode file name %ls - file skipped\n", name_);
        }
        else{
            fprintf(stdout, "warning: could not unencode file name %s - file skipped\n", name_);
        }
        return;
    }
    
    String text = file_dump(part, name);
    parse_text(part, name, text);
}

static void
parse_files_in_directory(Partition *part, Filename_Character *root, bool32 recursive){
    File_List list = get_file_list(part, root);
    for (int32_t i = 0; i < list.count; ++i){
        File_Info *info = &list.info[i];
        
        int32_t full_name_len = list.final_length + 1 + info->len;
        Filename_Character *full_name = push_array(part, Filename_Character, full_name_len + 1);
        push_align(part, 8);
        
        if (full_name == 0){
            fprintf(stdout, "fatal error: not enough memory to recurse to sub directory\n");
            exit(1);
        }
        
        memmove(full_name, list.final_name, list.final_length*sizeof(*full_name));
        full_name[list.final_length] = '\\';
        memmove(full_name + list.final_length + 1, info->name, info->len*sizeof(*full_name));
        full_name[full_name_len] = 0;
        
        if (!info->is_folder){
            parse_file(part, full_name, full_name_len);
        }
        else{
            parse_files_in_directory(part, full_name, recursive);
        }
    }
}

static void
show_usage(int argc, char **argv){
    char *name = "metadata_generator";
    if (argc >= 1){
        name = argv[0];
    }
    fprintf(stdout, "usage:\n%s [-R] <root-directory> [<root-directory2> ...]\n", name);
    exit(0);
}

int
main(int argc, char **argv){
    if (argc < 2){
        show_usage(argc, argv);
    }
    
    bool32 recursive = match(argv[1], "-R");
    if (recursive && argc < 3){
        show_usage(argc, argv);
    }
    
    int32_t size = (256 << 20);
    void *mem = malloc(size);
    Partition part_ = make_part(mem, size);
    Partition *part = &part_;
    
    int32_t start_i = 1;
    if (recursive){
        start_i = 2;
    }
    
    for (int32_t i = start_i; i < argc; ++i){
        Filename_Character *root_name = encode(part, argv[i]);
        parse_files_in_directory(part, root_name, recursive);
    }
    
    return(0);
}

// BOTTOM