diff --git a/src/defs.h b/src/defs.h index e2120ab..f3940d3 100644 --- a/src/defs.h +++ b/src/defs.h @@ -20,7 +20,6 @@ #define MAX_LOCALS 1500 #define MAX_FIELDS 64 #define MAX_FUNCS 512 -#define MAX_FUNC_TRIES 2160 #define MAX_TYPES 64 #define MAX_IR_INSTR 50000 #define MAX_BB_PRED 128 @@ -305,10 +304,18 @@ typedef struct { int value; } constant_t; +/* string-based hash map definitions */ + +typedef struct node { + char *key; + void *val; + struct node *next; +} node_t; + typedef struct { - int index; - int next[128]; -} trie_t; + int size; + node_t **buckets; +} hashmap_t; struct phi_operand { var_t *var; diff --git a/src/globals.c b/src/globals.c index 992bb78..65f7123 100644 --- a/src/globals.c +++ b/src/globals.c @@ -12,18 +12,7 @@ block_list_t BLOCKS; macro_t *MACROS; int macros_idx = 0; -/* the first element is reserved for global scope */ -func_t *FUNCS; -int funcs_idx = 1; - -/* FUNC_TRIES is used to improve the performance of the find_func function. - * Instead of searching through all functions and comparing their names, we can - * utilize the trie data structure to search for existing functions efficiently. - * The index starts from 1 because the first trie node represents an empty input - * string, and it is not possible to record a function with an empty name. - */ -trie_t *FUNC_TRIES; -int func_tries_idx = 1; +hashmap_t *FUNCS_MAP; type_t *TYPES; int types_idx = 0; @@ -72,72 +61,159 @@ char *elf_strtab; char *elf_section; /** - * insert_trie() - Inserts a new element into the trie structure. - * @trie: A pointer to the trie where the name will be inserted. - * @name: The name to be inserted into the trie. - * @funcs_index: The index of the pointer to the func_t. The index is recorded - * in a 1-indexed format. Because the first element of 'FUNCS' has been - * reserved, there is no need to shift it. - * Return: The index of the pointer to the func_t. + * hash_fnv1a() - hashes a string with FNV-1a hash function. + * The result may be negative. + * @key: The string to be hashed. * - * If the function has been inserted, the return value is the index of the - * function in FUNCS. Otherwise, the return value is the value of the parameter - * @funcs_index. + * @returns: The hash value of string. */ -int insert_trie(trie_t *trie, char *name, int funcs_index) -{ - char first_char; - int fc; - - while (1) { - first_char = *name; - fc = first_char; - if (!fc) { - if (!trie->index) - trie->index = funcs_index; - return trie->index; - } - if (!trie->next[fc]) { - /* FIXME: The func_tries_idx variable may exceed the maximum number, - * which can lead to a segmentation fault. This issue is affected by - * the number of functions and the length of their names. The proper - * way to handle this is to dynamically allocate a new element. - */ - trie->next[fc] = func_tries_idx++; - for (int i = 0; i < 128; i++) - FUNC_TRIES[trie->next[fc]].next[i] = 0; - FUNC_TRIES[trie->next[fc]].index = 0; - } - trie = &FUNC_TRIES[trie->next[fc]]; - name++; +int hash_fnv1a(char *key) +{ + int hash = 0x811c9dc5; + + for (; *key; key++) { + hash ^= *key; + hash *= 0x01000193; } + + return hash; +} + +/** + * hash_index_hashmap() - hashses a string and converts into + * usable hashmap index. + * @map: The key of node. Must not be NULL. + * @key: The key string. May be NULL. + * + * @returns: The usable hashmap index. + */ +int hash_index(int size, char *key) +{ + int hash = hash_fnv1a(key), mask = hash >> 31; + return ((hash ^ mask) - mask) % size; } /** - * find_trie() - search the index of the function name in the trie - * @trie: A pointer to the trie where the name will be searched. - * @name: The name to be searched. + * create_hashmap() - creates a hashmap on heap. + * @size: The initial bucket size of hashmap. * - * Return: The index of the pointer to the func_t. + * @returns: The pointer of created hashmap. + */ +hashmap_t *create_hashmap(int size) +{ + hashmap_t *map = malloc(sizeof(hashmap_t)); + map->size = size; + map->buckets = malloc(size * sizeof(node_t *)); + + for (int i = 0; i < map->size; i++) + map->buckets[i] = 0; + + return map; +} + +/** + * new_node() - creates a hashmap node on heap. + * @key: The key of node. Must not be NULL. + * @val: The value of node. Could be NULL. * - * 0 - the name not found. - * otherwise - the index of the founded index in the trie array. + * @returns: The pointer of created node. + */ +node_t *new_node(char *key, void *val) +{ + int len = strlen(key); + node_t *node = malloc(sizeof(node_t)); + node->key = calloc(len + 1, sizeof(char)); + strcpy(node->key, key); + node->val = val; + node->next = NULL; + return node; +} + +/** + * put_hashmap() - puts a key-value pair into given hashmap. + * If key already contains a value, then replace it with new + * value, the old value will be freed. + * @map: The hashmap to be put into. Must not be NULL. + * @key: The key string. May be NULL. + * @val: The value pointer. May be NULL. This value's lifetime + * is held by hashmap. */ -int find_trie(trie_t *trie, char *name) -{ - char first_char; - int fc; - - while (1) { - first_char = *name; - fc = first_char; - if (!fc) - return trie->index; - if (!trie->next[fc]) - return 0; - trie = &FUNC_TRIES[trie->next[fc]]; - name++; +void put_hashmap(hashmap_t *map, char *key, void *val) +{ + int index = hash_index(map->size, key); + node_t *cur = map->buckets[index]; + + if (!cur) { + map->buckets[index] = new_node(key, val); + } else { + while (cur->next) + cur = cur->next; + cur->next = new_node(key, val); } + + /* TODO: Rehash if size exceeds size * load factor */ +} + +/** + * get_hashmap() - gets value from hashmap from given key. + * @map: The hashmap to be looked up. Must no be NULL. + * @key: The key string. May be NULL. + * + * @returns: The look up result, if the key-value pair entry + * exists, then returns its value's address, NULL otherwise. + */ +void *get_hashmap(hashmap_t *map, char *key) +{ + int index = hash_index(map->size, key); + node_t *cur = map->buckets[index]; + + while (cur) { + if (!strcmp(cur->key, key)) + return cur->val; + + cur = cur->next; + } + + return NULL; +} + +/** + * contains_hashmap() - checks if the key-value pair entry exists + * from given key. + * @map: The hashmap to be looked up. Must no be NULL. + * @key: The key string. May be NULL. + * + * @returns: The look up result, if the key-value pair entry + * exists, then returns true, false otherwise. + */ +bool contains_hashmap(hashmap_t *map, char *key) +{ + return get_hashmap(map, key) != NULL; +} + +/** + * free_hashmap() - frees the hashmap, this also frees key-value pair + * entry's value. + * @map: The hashmap to be looked up. Must no be NULL. + */ +void free_hashmap(hashmap_t *map) +{ + for (int i = 0; i < map->size; i++) { + node_t *cur = map->buckets[i], *next; + + while (cur) { + next = cur->next; + free(cur->key); + free(cur->val); + /* FIXME: Remove this if-clause will cause double free error */ + if (cur != map->buckets[0]) + free(cur); + cur = next; + } + } + + free(map->buckets); + free(map); } /* options */ @@ -318,12 +394,14 @@ int find_macro_param_src_idx(char *name, block_t *parent) func_t *add_func(char *name) { func_t *fn; - int index = insert_trie(FUNC_TRIES, name, funcs_idx); - if (index == funcs_idx) { - fn = &FUNCS[funcs_idx++]; + if (contains_hashmap(FUNCS_MAP, name)) { + fn = get_hashmap(FUNCS_MAP, name); + } else { + fn = malloc(sizeof(func_t)); + put_hashmap(FUNCS_MAP, name, fn); strcpy(fn->return_def.var_name, name); } - fn = &FUNCS[index]; + fn->stack_size = 4; /* starting point of stack */ return fn; } @@ -358,10 +436,7 @@ constant_t *find_constant(char alias[]) func_t *find_func(char func_name[]) { - int index = find_trie(FUNC_TRIES, func_name); - if (index) - return &FUNCS[index]; - return NULL; + return get_hashmap(FUNCS_MAP, func_name); } var_t *find_member(char token[], type_t *type) @@ -597,8 +672,7 @@ void global_init() BLOCKS.head = NULL; BLOCKS.tail = NULL; MACROS = malloc(MAX_ALIASES * sizeof(macro_t)); - FUNCS = malloc(MAX_FUNCS * sizeof(func_t)); - FUNC_TRIES = malloc(MAX_FUNC_TRIES * sizeof(trie_t)); + FUNCS_MAP = create_hashmap(MAX_FUNCS); TYPES = malloc(MAX_TYPES * sizeof(type_t)); GLOBAL_IR = malloc(MAX_GLOBAL_IR * sizeof(ph1_ir_t)); PH1_IR = malloc(MAX_IR_INSTR * sizeof(ph1_ir_t)); @@ -616,7 +690,8 @@ void global_init() elf_section = malloc(MAX_SECTION); /* set starting point of global stack manually */ - FUNCS[0].stack_size = 4; + func_t *global_func = add_func(""); + global_func->stack_size = 4; } void global_release() @@ -627,8 +702,7 @@ void global_release() BLOCKS.head = next; } free(MACROS); - free(FUNCS); - free(FUNC_TRIES); + free_hashmap(FUNCS_MAP); free(TYPES); free(GLOBAL_IR); free(PH1_IR);