-
Notifications
You must be signed in to change notification settings - Fork 30
/
Copy pathhashtable.c
456 lines (402 loc) · 14.7 KB
/
hashtable.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
/* Yash: yet another shell */
/* hashtable.c: hashtable library */
/* (C) 2007-2012 magicant */
/* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
#include "common.h"
#include "hashtable.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include "util.h"
/* A hashtable is a mapping from keys to values.
* Keys and values are all of type (void *).
* NULL is allowed as a value, but not as a key.
* The capacity of a hashtable is always no less than one. */
/* The hashtable_T structure is defined as follows:
* struct hashtable_T {
* size_t capacity;
* size_t count;
* hashfunc_T *hashfunc;
* keycmp keycmp;
* size_t emptyindex;
* size_t tailindex;
* size_t *indices;
* struct hash_entry *entries;
* }
* `capacity' is the size of array `entries'.
* `count' is the number of entries contained in the hashtable.
* `hashfunc' is a pointer to the hash function.
* `keycmp' is a pointer to the function that compares keys.
* `emptyindex' is the index of the first empty entry.
* `tailindex' is the index of the first tail entry.
* `indices' is a pointer to the bucket array.
* `entries' is a pointer to the array of entries.
*
* The collision resolution strategy used in this implementation is a kind of
* separate chaining, but it differs from normal chaining in that entries are
* stored in a single array (`entries'). An advantage over normal chaining,
* which stores entries in linked lists, is spatial locality: entries can be
* quickly referenced because they are collected in one array. Another advantage
* is that we don't have to call `malloc' or `free' each time an entry is added
* or removed. */
//#define DEBUG_HASH 1
#if DEBUG_HASH /* For debugging */
# define DEBUG_PRINT_STATISTICS(ht) (print_statistics(ht))
# include <stdio.h>
static void print_statistics(const hashtable_T *ht);
#else
# define DEBUG_PRINT_STATISTICS(ht) ((void) 0)
#endif
/* The null index */
#define NOTHING ((size_t) -1)
/* hashtable entry */
struct hash_entry {
size_t next;
hashval_T hash;
kvpair_T kv;
};
/* An entry is occupied iff `.kv.key' is non-NULL.
* When an entry is unoccupied, the values of the other members of the entry are
* unspecified. */
/* Initializes a hashtable with the specified capacity.
* `hashfunc' is a hash function to hash keys.
* `keycmp' is a function that compares two keys. */
hashtable_T *ht_initwithcapacity(
hashtable_T *ht, hashfunc_T *hashfunc, keycmp_T *keycmp,
size_t capacity)
{
if (capacity == 0)
capacity = 1;
ht->capacity = capacity;
ht->count = 0;
ht->hashfunc = hashfunc;
ht->keycmp = keycmp;
ht->emptyindex = NOTHING;
ht->tailindex = 0;
ht->indices = xmallocn(capacity, sizeof *ht->indices);
ht->entries = xmallocn(capacity, sizeof *ht->entries);
for (size_t i = 0; i < capacity; i++) {
ht->indices[i] = NOTHING;
ht->entries[i].kv.key = NULL;
}
return ht;
}
/* Changes the capacity of the specified hashtable.
* If the specified new capacity is smaller than the number of the entries in
* the hashtable, the capacity is not changed.
* Note that the capacity cannot be zero. If `newcapacity' is zero, it is
* assumed to be one. */
/* Capacity should be an odd integer, especially a prime number. */
hashtable_T *ht_setcapacity(hashtable_T *ht, size_t newcapacity)
{
if (newcapacity == 0)
newcapacity = 1;
if (newcapacity < ht->count)
newcapacity = ht->count;
size_t oldcapacity = ht->capacity;
size_t *oldindices = ht->indices;
size_t *newindices = xmallocn(newcapacity, sizeof *ht->indices);
struct hash_entry *oldentries = ht->entries;
struct hash_entry *newentries = xmallocn(newcapacity, sizeof *ht->entries);
size_t tail = 0;
for (size_t i = 0; i < newcapacity; i++) {
newindices[i] = NOTHING;
newentries[i].kv.key = NULL;
}
/* move the data from oldentries to newentries */
for (size_t i = 0; i < oldcapacity; i++) {
void *key = oldentries[i].kv.key;
if (key != NULL) {
hashval_T hash = oldentries[i].hash;
size_t newindex = (size_t) hash % newcapacity;
newentries[tail] = (struct hash_entry) {
.next = newindices[newindex],
.hash = hash,
.kv = oldentries[i].kv,
};
newindices[newindex] = tail;
tail++;
}
}
free(oldindices);
free(oldentries);
ht->capacity = newcapacity;
ht->emptyindex = NOTHING;
ht->tailindex = tail;
ht->indices = newindices;
ht->entries = newentries;
return ht;
}
/* Increases the capacity as large as necessary
* so that the capacity is no less than the specified. */
hashtable_T *ht_ensurecapacity(hashtable_T *ht, size_t capacity)
{
if (capacity <= ht->capacity)
return ht;
size_t cap15 = ht->capacity + (ht->capacity >> 1);
if (capacity < cap15)
capacity = cap15;
if (capacity < ht->capacity + 6)
capacity = ht->capacity + 6;
return ht_setcapacity(ht, capacity);
}
/* Removes all the entries of a hashtable.
* If `freer' is non-NULL, it is called for each entry removed (in an
* unspecified order).
* The capacity of the hashtable is not changed. */
hashtable_T *ht_clear(hashtable_T *ht, void freer(kvpair_T kv))
{
size_t *indices = ht->indices;
struct hash_entry *entries = ht->entries;
if (ht->count == 0)
return ht;
for (size_t i = 0, cap = ht->capacity; i < cap; i++) {
indices[i] = NOTHING;
if (entries[i].kv.key != NULL) {
if (freer)
freer(entries[i].kv);
entries[i].kv.key = NULL;
}
}
ht->count = 0;
ht->emptyindex = NOTHING;
ht->tailindex = 0;
return ht;
}
/* Returns the entry whose key is equal to the specified `key',
* or { NULL, NULL } if `key' is NULL or there is no such entry. */
kvpair_T ht_get(const hashtable_T *ht, const void *key)
{
if (key != NULL) {
hashval_T hash = ht->hashfunc(key);
size_t index = ht->indices[(size_t) hash % ht->capacity];
while (index != NOTHING) {
struct hash_entry *entry = &ht->entries[index];
if (entry->hash == hash && ht->keycmp(entry->kv.key, key) == 0)
return entry->kv;
index = entry->next;
}
}
return (kvpair_T) { NULL, NULL, };
}
/* Makes a new entry with the specified key and value,
* removing and returning the old entry for the key.
* If there is no such old entry, { NULL, NULL } is returned.
* `key' must not be NULL. */
kvpair_T ht_set(hashtable_T *ht, const void *key, const void *value)
{
assert(key != NULL);
/* if there is an entry with the specified key, simply replace the value */
hashval_T hash = ht->hashfunc(key);
size_t mhash = (size_t) hash % ht->capacity;
size_t index = ht->indices[mhash];
struct hash_entry *entry;
while (index != NOTHING) {
entry = &ht->entries[index];
if (entry->hash == hash && ht->keycmp(entry->kv.key, key) == 0) {
kvpair_T oldkv = entry->kv;
entry->kv = (kvpair_T) { (void *) key, (void *) value, };
DEBUG_PRINT_STATISTICS(ht);
return oldkv;
}
index = entry->next;
}
/* No entry with the specified key was found; we add a new entry. */
index = ht->emptyindex;
if (index != NOTHING) {
/* if there is an empty entry, use it */
entry = &ht->entries[index];
ht->emptyindex = entry->next;
} else {
/* if there is no empty entry, use a tail entry */
ht_ensurecapacity(ht, ht->count + 1);
mhash = (size_t) hash % ht->capacity;
index = ht->tailindex++;
entry = &ht->entries[index];
}
*entry = (struct hash_entry) {
.next = ht->indices[mhash],
.hash = hash,
.kv = (kvpair_T) { (void *) key, (void *) value, },
};
ht->indices[mhash] = index;
ht->count++;
DEBUG_PRINT_STATISTICS(ht);
return (kvpair_T) { NULL, NULL, };
}
/* Removes and returns the entry with the specified key.
* If `key' is NULL or there is no such entry, { NULL, NULL } is returned. */
kvpair_T ht_remove(hashtable_T *ht, const void *key)
{
if (key != NULL) {
hashval_T hash = ht->hashfunc(key);
size_t *indexp = &ht->indices[(size_t) hash % ht->capacity];
while (*indexp != NOTHING) {
size_t index = *indexp;
struct hash_entry *entry = &ht->entries[index];
if (entry->hash == hash && ht->keycmp(entry->kv.key, key) == 0) {
kvpair_T oldkv = entry->kv;
*indexp = entry->next;
entry->next = ht->emptyindex;
ht->emptyindex = index;
entry->kv.key = NULL;
ht->count--;
return oldkv;
}
indexp = &entry->next;
}
}
return (kvpair_T) { NULL, NULL, };
}
#if 0
/* Calls the specified function `f' for each entry in the specified hashtable.
* The order in which the entries are applied the function to is unspecified.
* If `f' returns a non-zero value for some entry, `f' is not called any more
* and `ht_each' immediately returns the non-zero value. Otherwise, that is,
* if `f' returns zero for all the entry, `ht_each' also returns zero.
* You must not add or remove any entry inside function `f'. */
int ht_each(const hashtable_T *ht, int f(kvpair_T kv))
{
struct hash_entry *entries = ht->entries;
for (size_t i = 0, cap = ht->capacity; i < cap; i++) {
kvpair_T kv = entries[i].kv;
if (kv.key != NULL) {
int r = f(kv);
if (r != 0)
return r;
}
}
return 0;
}
#endif
/* Iterates the entries of the specified hashtable.
* When starting new iteration, `*indexp' must have been initialized to zero.
* Each time this function is called, it updates `*indexp' and returns one
* entry.
* You must not change the value of `*indexp' from outside this function or
* add/remove any entry in the hashtable until the iteration finishes.
* Each entry is returned exactly once, in an unspecified order.
* If there is no more entry to be iterated, { NULL, NULL } is returned. */
kvpair_T ht_next(const hashtable_T *restrict ht, size_t *restrict indexp)
{
while (*indexp < ht->capacity) {
kvpair_T kv = ht->entries[*indexp].kv;
(*indexp)++;
if (kv.key != NULL)
return kv;
}
return (kvpair_T) { NULL, NULL, };
}
/* Returns a newly malloced array of key-value pairs that contains all the
* elements of the specified hashtable.
* The returned array is terminated by the { NULL, NULL } element. */
kvpair_T *ht_tokvarray(const hashtable_T *ht)
{
kvpair_T *array = xmalloce(ht->count, 1, sizeof *array);
size_t index = 0;
for (size_t i = 0; i < ht->capacity; i++) {
if (ht->entries[i].kv.key != NULL)
array[index++] = ht->entries[i].kv;
}
assert(index == ht->count);
array[index] = (kvpair_T) { NULL, NULL, };
return array;
}
/* A hash function for a byte string.
* The argument is a pointer to a byte string (const char *).
* You can use `htstrcmp' as a corresponding comparison function. */
hashval_T hashstr(const void *s)
{
/* The hashing algorithm is FNV hash.
* Cf. http://www.isthe.com/chongo/tech/comp/fnv/ */
const unsigned char *c = s;
hashval_T h = 0;
while (*c != '\0')
h = (h ^ (hashval_T) *c++) * FNVPRIME;
return h;
}
/* A hash function for a wide string.
* The argument is a pointer to a wide string (const wchar_t *).
* You can use `htwcscmp' for a corresponding comparison function. */
hashval_T hashwcs(const void *s)
{
/* The hashing algorithm is a slightly modified version of FNV hash.
* Cf. http://www.isthe.com/chongo/tech/comp/fnv/ */
const wchar_t *c = s;
hashval_T h = 0;
while (*c != L'\0')
h = (h ^ (hashval_T) *c++) * FNVPRIME;
return h;
}
/* A comparison function for wide strings.
* The arguments are pointers to wide strings (const wchar_t *).
* You can use `hashwcs' for a corresponding hash function. */
int htwcscmp(const void *s1, const void *s2)
{
return wcscmp((const wchar_t *) s1, (const wchar_t *) s2);
}
/* A comparison function for key-value pairs with multibyte-string keys.
* The arguments are pointers to kvpair_T's (const kvpair_T *) whose keys are
* multibyte strings. */
int keystrcoll(const void *k1, const void *k2)
{
return strcoll(((const kvpair_T *) k1)->key, ((const kvpair_T *) k2)->key);
}
/* A comparison function for key-value pairs with wide-string keys.
* The arguments are pointers to kvpair_T's (const kvpair_T *) whose keys are
* wide strings. */
int keywcscoll(const void *k1, const void *k2)
{
return wcscoll(((const kvpair_T *) k1)->key, ((const kvpair_T *) k2)->key);
}
/* `Free's the key of the specified key-value pair.
* Can be used as the freer function to `ht_clear'. */
void kfree(kvpair_T kv)
{
free(kv.key);
}
/* `Free's the value of the specified key-value pair.
* Can be used as the freer function to `ht_clear'. */
void vfree(kvpair_T kv)
{
free(kv.value);
}
/* `Free's the key and the value of the specified key-value pair.
* Can be used as the freer function to `ht_clear'. */
void kvfree(kvpair_T kv)
{
free(kv.key);
free(kv.value);
}
#if DEBUG_HASH
/* Prints statistics.
* This function is used in debugging. */
void print_statistics(const hashtable_T *ht)
{
fprintf(stderr, "DEBUG: id=%p hash->count=%zu, capacity=%zu\n",
(void *) ht, ht->count, ht->capacity);
fprintf(stderr, "DEBUG: hash->emptyindex=%zu, tailindex=%zu\n",
ht->emptyindex, ht->tailindex);
unsigned emptycount = 0, collcount = 0;
for (size_t i = ht->emptyindex; i != NOTHING; i = ht->entries[i].next)
emptycount++;
for (size_t i = 0; i < ht->capacity; i++)
if (ht->entries[i].kv.key && ht->entries[i].next != NOTHING)
collcount++;
fprintf(stderr, "DEBUG: hash empties=%u collisions=%u\n\n",
emptycount, collcount);
}
#endif
/* vim: set ts=8 sts=4 sw=4 et tw=80: */