Ruby 3.2.4p170 (2024-04-23 revision af471c0e0127eea0cafa6f308c0425bbfab0acf5)
st.c
1/* This is a public domain general purpose hash table package
2 originally written by Peter Moore @ UCB.
3
4 The hash table data structures were redesigned and the package was
5 rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
6
7/* The original package implemented classic bucket-based hash tables
8 with entries doubly linked for an access by their insertion order.
9 To decrease pointer chasing and as a consequence to improve a data
10 locality the current implementation is based on storing entries in
11 an array and using hash tables with open addressing. The current
12 entries are more compact in comparison with the original ones and
13 this also improves the data locality.
14
15 The hash table has two arrays called *bins* and *entries*.
16
17 bins:
18 -------
19 | | entries array:
20 |-------| --------------------------------
21 | index | | | entry: | | |
22 |-------| | | | | |
23 | ... | | ... | hash | ... | ... |
24 |-------| | | key | | |
25 | empty | | | record | | |
26 |-------| --------------------------------
27 | ... | ^ ^
28 |-------| |_ entries start |_ entries bound
29 |deleted|
30 -------
31
32 o The entry array contains table entries in the same order as they
33 were inserted.
34
35 When the first entry is deleted, a variable containing index of
36 the current first entry (*entries start*) is changed. In all
37 other cases of the deletion, we just mark the entry as deleted by
38 using a reserved hash value.
39
40 Such organization of the entry storage makes operations of the
41 table shift and the entries traversal very fast.
42
43 o The bins provide access to the entries by their keys. The
44 key hash is mapped to a bin containing *index* of the
45 corresponding entry in the entry array.
46
47 The bin array size is always power of two, it makes mapping very
48 fast by using the corresponding lower bits of the hash.
49 Generally it is not a good idea to ignore some part of the hash.
50 But alternative approach is worse. For example, we could use a
51 modulo operation for mapping and a prime number for the size of
52 the bin array. Unfortunately, the modulo operation for big
53 64-bit numbers are extremely slow (it takes more than 100 cycles
54 on modern Intel CPUs).
55
56 Still other bits of the hash value are used when the mapping
57 results in a collision. In this case we use a secondary hash
58 value which is a result of a function of the collision bin
59 index and the original hash value. The function choice
60 guarantees that we can traverse all bins and finally find the
61 corresponding bin as after several iterations the function
62 becomes a full cycle linear congruential generator because it
63 satisfies requirements of the Hull-Dobell theorem.
64
65 When an entry is removed from the table besides marking the
66 hash in the corresponding entry described above, we also mark
67 the bin by a special value in order to find entries which had
68 a collision with the removed entries.
69
70 There are two reserved values for the bins. One denotes an
71 empty bin, another one denotes a bin for a deleted entry.
72
73 o The length of the bin array is at least two times more than the
74 entry array length. This keeps the table load factor healthy.
75 The trigger of rebuilding the table is always a case when we can
76 not insert an entry anymore at the entries bound. We could
77 change the entries bound too in case of deletion but than we need
78 a special code to count bins with corresponding deleted entries
79 and reset the bin values when there are too many bins
80 corresponding deleted entries
81
82 Table rebuilding is done by creation of a new entry array and
83 bins of an appropriate size. We also try to reuse the arrays
84 in some cases by compacting the array and removing deleted
85 entries.
86
87 o To save memory very small tables have no allocated arrays
88 bins. We use a linear search for an access by a key.
89
90 o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
91 bins depending on the current hash table size.
92
93 o The implementation takes into account that the table can be
94 rebuilt during hashing or comparison functions. It can happen if
95 the functions are implemented in Ruby and a thread switch occurs
96 during their execution.
97
98 This implementation speeds up the Ruby hash table benchmarks in
99 average by more 40% on Intel Haswell CPU.
100
101*/
102
103#ifdef NOT_RUBY
104#include "regint.h"
105#include "st.h"
106#else
107#include "internal.h"
108#include "internal/bits.h"
109#include "internal/hash.h"
110#include "internal/sanitizers.h"
111#endif
112
113#include <stdio.h>
114#ifdef HAVE_STDLIB_H
115#include <stdlib.h>
116#endif
117#include <string.h>
118#include <assert.h>
119
120#ifdef __GNUC__
121#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
122#define EXPECT(expr, val) __builtin_expect(expr, val)
123#define ATTRIBUTE_UNUSED __attribute__((unused))
124#else
125#define PREFETCH(addr, write_p)
126#define EXPECT(expr, val) (expr)
127#define ATTRIBUTE_UNUSED
128#endif
129
130/* The type of hashes. */
131typedef st_index_t st_hash_t;
132
134 st_hash_t hash;
135 st_data_t key;
136 st_data_t record;
137};
138
139#define type_numhash st_hashtype_num
140static const struct st_hash_type st_hashtype_num = {
141 st_numcmp,
142 st_numhash,
143};
144
145static int st_strcmp(st_data_t, st_data_t);
146static st_index_t strhash(st_data_t);
147static const struct st_hash_type type_strhash = {
148 st_strcmp,
149 strhash,
150};
151
152static int st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs);
153static st_index_t strcasehash(st_data_t);
154static const struct st_hash_type type_strcasehash = {
155 st_locale_insensitive_strcasecmp_i,
156 strcasehash,
157};
158
159/* Value used to catch uninitialized entries/bins during debugging.
160 There is a possibility for a false alarm, but its probability is
161 extremely small. */
162#define ST_INIT_VAL 0xafafafafafafafaf
163#define ST_INIT_VAL_BYTE 0xafa
164
165#ifdef RUBY
166#undef malloc
167#undef realloc
168#undef calloc
169#undef free
170#define malloc ruby_xmalloc
171#define calloc ruby_xcalloc
172#define realloc ruby_xrealloc
173#define free ruby_xfree
174#endif
175
176#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
177#define PTR_EQUAL(tab, ptr, hash_val, key_) \
178 ((ptr)->hash == (hash_val) && EQUAL((tab), (key_), (ptr)->key))
179
180/* As PTR_EQUAL only its result is returned in RES. REBUILT_P is set
181 up to TRUE if the table is rebuilt during the comparison. */
182#define DO_PTR_EQUAL_CHECK(tab, ptr, hash_val, key, res, rebuilt_p) \
183 do { \
184 unsigned int _old_rebuilds_num = (tab)->rebuilds_num; \
185 res = PTR_EQUAL(tab, ptr, hash_val, key); \
186 rebuilt_p = _old_rebuilds_num != (tab)->rebuilds_num; \
187 } while (FALSE)
188
189/* Features of a table. */
191 /* Power of 2 used for number of allocated entries. */
192 unsigned char entry_power;
193 /* Power of 2 used for number of allocated bins. Depending on the
194 table size, the number of bins is 2-4 times more than the
195 number of entries. */
196 unsigned char bin_power;
197 /* Enumeration of sizes of bins (8-bit, 16-bit etc). */
198 unsigned char size_ind;
199 /* Bins are packed in words of type st_index_t. The following is
200 a size of bins counted by words. */
201 st_index_t bins_words;
202};
203
204/* Features of all possible size tables. */
205#if SIZEOF_ST_INDEX_T == 8
206#define MAX_POWER2 62
207static const struct st_features features[] = {
208 {0, 1, 0, 0x0},
209 {1, 2, 0, 0x1},
210 {2, 3, 0, 0x1},
211 {3, 4, 0, 0x2},
212 {4, 5, 0, 0x4},
213 {5, 6, 0, 0x8},
214 {6, 7, 0, 0x10},
215 {7, 8, 0, 0x20},
216 {8, 9, 1, 0x80},
217 {9, 10, 1, 0x100},
218 {10, 11, 1, 0x200},
219 {11, 12, 1, 0x400},
220 {12, 13, 1, 0x800},
221 {13, 14, 1, 0x1000},
222 {14, 15, 1, 0x2000},
223 {15, 16, 1, 0x4000},
224 {16, 17, 2, 0x10000},
225 {17, 18, 2, 0x20000},
226 {18, 19, 2, 0x40000},
227 {19, 20, 2, 0x80000},
228 {20, 21, 2, 0x100000},
229 {21, 22, 2, 0x200000},
230 {22, 23, 2, 0x400000},
231 {23, 24, 2, 0x800000},
232 {24, 25, 2, 0x1000000},
233 {25, 26, 2, 0x2000000},
234 {26, 27, 2, 0x4000000},
235 {27, 28, 2, 0x8000000},
236 {28, 29, 2, 0x10000000},
237 {29, 30, 2, 0x20000000},
238 {30, 31, 2, 0x40000000},
239 {31, 32, 2, 0x80000000},
240 {32, 33, 3, 0x200000000},
241 {33, 34, 3, 0x400000000},
242 {34, 35, 3, 0x800000000},
243 {35, 36, 3, 0x1000000000},
244 {36, 37, 3, 0x2000000000},
245 {37, 38, 3, 0x4000000000},
246 {38, 39, 3, 0x8000000000},
247 {39, 40, 3, 0x10000000000},
248 {40, 41, 3, 0x20000000000},
249 {41, 42, 3, 0x40000000000},
250 {42, 43, 3, 0x80000000000},
251 {43, 44, 3, 0x100000000000},
252 {44, 45, 3, 0x200000000000},
253 {45, 46, 3, 0x400000000000},
254 {46, 47, 3, 0x800000000000},
255 {47, 48, 3, 0x1000000000000},
256 {48, 49, 3, 0x2000000000000},
257 {49, 50, 3, 0x4000000000000},
258 {50, 51, 3, 0x8000000000000},
259 {51, 52, 3, 0x10000000000000},
260 {52, 53, 3, 0x20000000000000},
261 {53, 54, 3, 0x40000000000000},
262 {54, 55, 3, 0x80000000000000},
263 {55, 56, 3, 0x100000000000000},
264 {56, 57, 3, 0x200000000000000},
265 {57, 58, 3, 0x400000000000000},
266 {58, 59, 3, 0x800000000000000},
267 {59, 60, 3, 0x1000000000000000},
268 {60, 61, 3, 0x2000000000000000},
269 {61, 62, 3, 0x4000000000000000},
270 {62, 63, 3, 0x8000000000000000},
271};
272
273#else
274#define MAX_POWER2 30
275
276static const struct st_features features[] = {
277 {0, 1, 0, 0x1},
278 {1, 2, 0, 0x1},
279 {2, 3, 0, 0x2},
280 {3, 4, 0, 0x4},
281 {4, 5, 0, 0x8},
282 {5, 6, 0, 0x10},
283 {6, 7, 0, 0x20},
284 {7, 8, 0, 0x40},
285 {8, 9, 1, 0x100},
286 {9, 10, 1, 0x200},
287 {10, 11, 1, 0x400},
288 {11, 12, 1, 0x800},
289 {12, 13, 1, 0x1000},
290 {13, 14, 1, 0x2000},
291 {14, 15, 1, 0x4000},
292 {15, 16, 1, 0x8000},
293 {16, 17, 2, 0x20000},
294 {17, 18, 2, 0x40000},
295 {18, 19, 2, 0x80000},
296 {19, 20, 2, 0x100000},
297 {20, 21, 2, 0x200000},
298 {21, 22, 2, 0x400000},
299 {22, 23, 2, 0x800000},
300 {23, 24, 2, 0x1000000},
301 {24, 25, 2, 0x2000000},
302 {25, 26, 2, 0x4000000},
303 {26, 27, 2, 0x8000000},
304 {27, 28, 2, 0x10000000},
305 {28, 29, 2, 0x20000000},
306 {29, 30, 2, 0x40000000},
307 {30, 31, 2, 0x80000000},
308};
309
310#endif
311
312/* The reserved hash value and its substitution. */
313#define RESERVED_HASH_VAL (~(st_hash_t) 0)
314#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
315
316/* Return hash value of KEY for table TAB. */
317static inline st_hash_t
318do_hash(st_data_t key, st_table *tab)
319{
320 st_hash_t hash = (st_hash_t)(tab->type->hash)(key);
321
322 /* RESERVED_HASH_VAL is used for a deleted entry. Map it into
323 another value. Such mapping should be extremely rare. */
324 return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
325}
326
327/* Power of 2 defining the minimal number of allocated entries. */
328#define MINIMAL_POWER2 2
329
330#if MINIMAL_POWER2 < 2
331#error "MINIMAL_POWER2 should be >= 2"
332#endif
333
334/* If the power2 of the allocated `entries` is less than the following
335 value, don't allocate bins and use a linear search. */
336#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 4
337
338/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
339static int
340get_power2(st_index_t size)
341{
342 unsigned int n = ST_INDEX_BITS - nlz_intptr(size);
343 if (n <= MAX_POWER2)
344 return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
345#ifndef NOT_RUBY
346 /* Ran out of the table entries */
347 rb_raise(rb_eRuntimeError, "st_table too big");
348#endif
349 /* should raise exception */
350 return -1;
351}
352
353/* Return value of N-th bin in array BINS of table with bins size
354 index S. */
355static inline st_index_t
356get_bin(st_index_t *bins, int s, st_index_t n)
357{
358 return (s == 0 ? ((unsigned char *) bins)[n]
359 : s == 1 ? ((unsigned short *) bins)[n]
360 : s == 2 ? ((unsigned int *) bins)[n]
361 : ((st_index_t *) bins)[n]);
362}
363
364/* Set up N-th bin in array BINS of table with bins size index S to
365 value V. */
366static inline void
367set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v)
368{
369 if (s == 0) ((unsigned char *) bins)[n] = (unsigned char) v;
370 else if (s == 1) ((unsigned short *) bins)[n] = (unsigned short) v;
371 else if (s == 2) ((unsigned int *) bins)[n] = (unsigned int) v;
372 else ((st_index_t *) bins)[n] = v;
373}
374
375/* These macros define reserved values for empty table bin and table
376 bin which contains a deleted entry. We will never use such values
377 for an entry index in bins. */
378#define EMPTY_BIN 0
379#define DELETED_BIN 1
380/* Base of a real entry index in the bins. */
381#define ENTRY_BASE 2
382
383/* Mark I-th bin of table TAB as empty, in other words not
384 corresponding to any entry. */
385#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
386
387/* Values used for not found entry and bin with given
388 characteristics. */
389#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
390#define UNDEFINED_BIN_IND (~(st_index_t) 0)
391
392/* Entry and bin values returned when we found a table rebuild during
393 the search. */
394#define REBUILT_TABLE_ENTRY_IND (~(st_index_t) 1)
395#define REBUILT_TABLE_BIN_IND (~(st_index_t) 1)
396
397/* Mark I-th bin of table TAB as corresponding to a deleted table
398 entry. Update number of entries in the table and number of bins
399 corresponding to deleted entries. */
400#define MARK_BIN_DELETED(tab, i) \
401 do { \
402 set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
403 } while (0)
404
405/* Macros to check that value B is used empty bins and bins
406 corresponding deleted entries. */
407#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
408#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
409#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
410
411/* Macros to check empty bins and bins corresponding to deleted
412 entries. Bins are given by their index I in table TAB. */
413#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
414#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
415#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
416
417/* Macros for marking and checking deleted entries given by their
418 pointer E_PTR. */
419#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
420#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
421
422/* Return bin size index of table TAB. */
423static inline unsigned int
424get_size_ind(const st_table *tab)
425{
426 return tab->size_ind;
427}
428
429/* Return the number of allocated bins of table TAB. */
430static inline st_index_t
431get_bins_num(const st_table *tab)
432{
433 return ((st_index_t) 1)<<tab->bin_power;
434}
435
436/* Return mask for a bin index in table TAB. */
437static inline st_index_t
438bins_mask(const st_table *tab)
439{
440 return get_bins_num(tab) - 1;
441}
442
443/* Return the index of table TAB bin corresponding to
444 HASH_VALUE. */
445static inline st_index_t
446hash_bin(st_hash_t hash_value, st_table *tab)
447{
448 return hash_value & bins_mask(tab);
449}
450
451/* Return the number of allocated entries of table TAB. */
452static inline st_index_t
453get_allocated_entries(const st_table *tab)
454{
455 return ((st_index_t) 1)<<tab->entry_power;
456}
457
458/* Return size of the allocated bins of table TAB. */
459static inline st_index_t
460bins_size(const st_table *tab)
461{
462 return features[tab->entry_power].bins_words * sizeof (st_index_t);
463}
464
465/* Mark all bins of table TAB as empty. */
466static void
467initialize_bins(st_table *tab)
468{
469 memset(tab->bins, 0, bins_size(tab));
470}
471
472/* Make table TAB empty. */
473static void
474make_tab_empty(st_table *tab)
475{
476 tab->num_entries = 0;
477 tab->entries_start = tab->entries_bound = 0;
478 if (tab->bins != NULL)
479 initialize_bins(tab);
480}
481
482#ifdef HASH_LOG
483#ifdef HAVE_UNISTD_H
484#include <unistd.h>
485#endif
486static struct {
487 int all, total, num, str, strcase;
488} collision;
489
490/* Flag switching off output of package statistics at the end of
491 program. */
492static int init_st = 0;
493
494/* Output overall number of table searches and collisions into a
495 temporary file. */
496static void
497stat_col(void)
498{
499 char fname[10+sizeof(long)*3];
500 FILE *f;
501 if (!collision.total) return;
502 f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
503 if (f == NULL)
504 return;
505 fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
506 ((double)collision.all / (collision.total)) * 100);
507 fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
508 fclose(f);
509}
510#endif
511
512/* Create and return table with TYPE which can hold at least SIZE
513 entries. The real number of entries which the table can hold is
514 the nearest power of two for SIZE. */
515st_table *
516st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
517{
518 st_table *tab;
519 int n;
520
521#ifdef HASH_LOG
522#if HASH_LOG+0 < 0
523 {
524 const char *e = getenv("ST_HASH_LOG");
525 if (!e || !*e) init_st = 1;
526 }
527#endif
528 if (init_st == 0) {
529 init_st = 1;
530 atexit(stat_col);
531 }
532#endif
533
534 n = get_power2(size);
535#ifndef RUBY
536 if (n < 0)
537 return NULL;
538#endif
539 tab = (st_table *) malloc(sizeof (st_table));
540#ifndef RUBY
541 if (tab == NULL)
542 return NULL;
543#endif
544 tab->type = type;
545 tab->entry_power = n;
546 tab->bin_power = features[n].bin_power;
547 tab->size_ind = features[n].size_ind;
548 if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
549 tab->bins = NULL;
550 else {
551 tab->bins = (st_index_t *) malloc(bins_size(tab));
552#ifndef RUBY
553 if (tab->bins == NULL) {
554 free(tab);
555 return NULL;
556 }
557#endif
558 }
559 tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
560 * sizeof(st_table_entry));
561#ifndef RUBY
562 if (tab->entries == NULL) {
563 st_free_table(tab);
564 return NULL;
565 }
566#endif
567 make_tab_empty(tab);
568 tab->rebuilds_num = 0;
569 return tab;
570}
571
572size_t
573st_table_size(const struct st_table *tbl)
574{
575 return tbl->num_entries;
576}
577
578/* Create and return table with TYPE which can hold a minimal number
579 of entries (see comments for get_power2). */
580st_table *
581st_init_table(const struct st_hash_type *type)
582{
583 return st_init_table_with_size(type, 0);
584}
585
586/* Create and return table which can hold a minimal number of
587 numbers. */
588st_table *
589st_init_numtable(void)
590{
591 return st_init_table(&type_numhash);
592}
593
594/* Create and return table which can hold SIZE numbers. */
595st_table *
596st_init_numtable_with_size(st_index_t size)
597{
598 return st_init_table_with_size(&type_numhash, size);
599}
600
601/* Create and return table which can hold a minimal number of
602 strings. */
603st_table *
604st_init_strtable(void)
605{
606 return st_init_table(&type_strhash);
607}
608
609/* Create and return table which can hold SIZE strings. */
610st_table *
611st_init_strtable_with_size(st_index_t size)
612{
613 return st_init_table_with_size(&type_strhash, size);
614}
615
616/* Create and return table which can hold a minimal number of strings
617 whose character case is ignored. */
618st_table *
619st_init_strcasetable(void)
620{
621 return st_init_table(&type_strcasehash);
622}
623
624/* Create and return table which can hold SIZE strings whose character
625 case is ignored. */
626st_table *
627st_init_strcasetable_with_size(st_index_t size)
628{
629 return st_init_table_with_size(&type_strcasehash, size);
630}
631
632/* Make table TAB empty. */
633void
634st_clear(st_table *tab)
635{
636 make_tab_empty(tab);
637 tab->rebuilds_num++;
638}
639
640/* Free table TAB space. */
641void
642st_free_table(st_table *tab)
643{
644 if (tab->bins != NULL)
645 free(tab->bins);
646 free(tab->entries);
647 free(tab);
648}
649
650/* Return byte size of memory allocated for table TAB. */
651size_t
652st_memsize(const st_table *tab)
653{
654 return(sizeof(st_table)
655 + (tab->bins == NULL ? 0 : bins_size(tab))
656 + get_allocated_entries(tab) * sizeof(st_table_entry));
657}
658
659static st_index_t
660find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
661
662static st_index_t
663find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
664
665static st_index_t
666find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
667
668static st_index_t
669find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
670 st_data_t key, st_index_t *bin_ind);
671
672#ifdef HASH_LOG
673static void
674count_collision(const struct st_hash_type *type)
675{
676 collision.all++;
677 if (type == &type_numhash) {
678 collision.num++;
679 }
680 else if (type == &type_strhash) {
681 collision.strcase++;
682 }
683 else if (type == &type_strcasehash) {
684 collision.str++;
685 }
686}
687
688#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
689#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
690#define collision_check 0
691#else
692#define COLLISION
693#define FOUND_BIN
694#endif
695
696/* If the number of entries in the table is at least REBUILD_THRESHOLD
697 times less than the entry array length, decrease the table
698 size. */
699#define REBUILD_THRESHOLD 4
700
701#if REBUILD_THRESHOLD < 2
702#error "REBUILD_THRESHOLD should be >= 2"
703#endif
704
705static void rebuild_table_with(st_table *new_tab, st_table *tab);
706
707/* Rebuild table TAB. Rebuilding removes all deleted bins and entries
708 and can change size of the table entries and bins arrays.
709 Rebuilding is implemented by creation of a new table or by
710 compaction of the existing one. */
711static void
712rebuild_table(st_table *tab)
713{
714 if ((2 * tab->num_entries <= get_allocated_entries(tab)
715 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
716 || tab->num_entries < (1 << MINIMAL_POWER2)) {
717 /* Compaction: */
718 tab->num_entries = 0;
719 if (tab->bins != NULL)
720 initialize_bins(tab);
721 rebuild_table_with(tab, tab);
722 }
723 else {
724 st_table *new_tab;
725 /* This allocation could trigger GC and compaction. If tab is the
726 * gen_iv_tbl, then tab could have changed in size due to objects being
727 * freed and/or moved. Do not store attributes of tab before this line. */
728 new_tab = st_init_table_with_size(tab->type,
729 2 * tab->num_entries - 1);
730 rebuild_table_with(new_tab, tab);
731 }
732}
733
734static void
735rebuild_table_with(st_table *new_tab, st_table *tab)
736{
737 st_index_t i, ni;
738 unsigned int size_ind;
739 st_table_entry *new_entries;
740 st_table_entry *curr_entry_ptr;
741 st_index_t *bins;
742 st_index_t bin_ind;
743
744 new_entries = new_tab->entries;
745
746 ni = 0;
747 bins = new_tab->bins;
748 size_ind = get_size_ind(new_tab);
749 st_index_t bound = tab->entries_bound;
750 st_table_entry *entries = tab->entries;
751
752 for (i = tab->entries_start; i < bound; i++) {
753 curr_entry_ptr = &entries[i];
754 PREFETCH(entries + i + 1, 0);
755 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
756 continue;
757 if (&new_entries[ni] != curr_entry_ptr)
758 new_entries[ni] = *curr_entry_ptr;
759 if (EXPECT(bins != NULL, 1)) {
760 bin_ind = find_table_bin_ind_direct(new_tab, curr_entry_ptr->hash,
761 curr_entry_ptr->key);
762 set_bin(bins, size_ind, bin_ind, ni + ENTRY_BASE);
763 }
764 new_tab->num_entries++;
765 ni++;
766 }
767 if (new_tab != tab) {
768 tab->entry_power = new_tab->entry_power;
769 tab->bin_power = new_tab->bin_power;
770 tab->size_ind = new_tab->size_ind;
771 if (tab->bins != NULL)
772 free(tab->bins);
773 tab->bins = new_tab->bins;
774 free(tab->entries);
775 tab->entries = new_tab->entries;
776 free(new_tab);
777 }
778 tab->entries_start = 0;
779 tab->entries_bound = tab->num_entries;
780 tab->rebuilds_num++;
781}
782
783/* Return the next secondary hash index for table TAB using previous
784 index IND and PERTERB. Finally modulo of the function becomes a
785 full *cycle linear congruential generator*, in other words it
786 guarantees traversing all table bins in extreme case.
787
788 According the Hull-Dobell theorem a generator
789 "Xnext = (a*Xprev + c) mod m" is a full cycle generator if and only if
790 o m and c are relatively prime
791 o a-1 is divisible by all prime factors of m
792 o a-1 is divisible by 4 if m is divisible by 4.
793
794 For our case a is 5, c is 1, and m is a power of two. */
795static inline st_index_t
796secondary_hash(st_index_t ind, st_table *tab, st_index_t *perterb)
797{
798 *perterb >>= 11;
799 ind = (ind << 2) + ind + *perterb + 1;
800 return hash_bin(ind, tab);
801}
802
803/* Find an entry with HASH_VALUE and KEY in TABLE using a linear
804 search. Return the index of the found entry in array `entries`.
805 If it is not found, return UNDEFINED_ENTRY_IND. If the table was
806 rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
807static inline st_index_t
808find_entry(st_table *tab, st_hash_t hash_value, st_data_t key)
809{
810 int eq_p, rebuilt_p;
811 st_index_t i, bound;
812 st_table_entry *entries;
813
814 bound = tab->entries_bound;
815 entries = tab->entries;
816 for (i = tab->entries_start; i < bound; i++) {
817 DO_PTR_EQUAL_CHECK(tab, &entries[i], hash_value, key, eq_p, rebuilt_p);
818 if (EXPECT(rebuilt_p, 0))
819 return REBUILT_TABLE_ENTRY_IND;
820 if (eq_p)
821 return i;
822 }
823 return UNDEFINED_ENTRY_IND;
824}
825
826/* Use the quadratic probing. The method has a better data locality
827 but more collisions than the current approach. In average it
828 results in a bit slower search. */
829/*#define QUADRATIC_PROBE*/
830
831/* Return index of entry with HASH_VALUE and KEY in table TAB. If
832 there is no such entry, return UNDEFINED_ENTRY_IND. If the table
833 was rebuilt during the search, return REBUILT_TABLE_ENTRY_IND. */
834static st_index_t
835find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
836{
837 int eq_p, rebuilt_p;
838 st_index_t ind;
839#ifdef QUADRATIC_PROBE
840 st_index_t d;
841#else
842 st_index_t peterb;
843#endif
844 st_index_t bin;
845 st_table_entry *entries = tab->entries;
846
847 ind = hash_bin(hash_value, tab);
848#ifdef QUADRATIC_PROBE
849 d = 1;
850#else
851 peterb = hash_value;
852#endif
853 FOUND_BIN;
854 for (;;) {
855 bin = get_bin(tab->bins, get_size_ind(tab), ind);
856 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
857 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
858 if (EXPECT(rebuilt_p, 0))
859 return REBUILT_TABLE_ENTRY_IND;
860 if (eq_p)
861 break;
862 }
863 else if (EMPTY_BIN_P(bin))
864 return UNDEFINED_ENTRY_IND;
865#ifdef QUADRATIC_PROBE
866 ind = hash_bin(ind + d, tab);
867 d++;
868#else
869 ind = secondary_hash(ind, tab, &peterb);
870#endif
871 COLLISION;
872 }
873 return bin;
874}
875
876/* Find and return index of table TAB bin corresponding to an entry
877 with HASH_VALUE and KEY. If there is no such bin, return
878 UNDEFINED_BIN_IND. If the table was rebuilt during the search,
879 return REBUILT_TABLE_BIN_IND. */
880static st_index_t
881find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key)
882{
883 int eq_p, rebuilt_p;
884 st_index_t ind;
885#ifdef QUADRATIC_PROBE
886 st_index_t d;
887#else
888 st_index_t peterb;
889#endif
890 st_index_t bin;
891 st_table_entry *entries = tab->entries;
892
893 ind = hash_bin(hash_value, tab);
894#ifdef QUADRATIC_PROBE
895 d = 1;
896#else
897 peterb = hash_value;
898#endif
899 FOUND_BIN;
900 for (;;) {
901 bin = get_bin(tab->bins, get_size_ind(tab), ind);
902 if (! EMPTY_OR_DELETED_BIN_P(bin)) {
903 DO_PTR_EQUAL_CHECK(tab, &entries[bin - ENTRY_BASE], hash_value, key, eq_p, rebuilt_p);
904 if (EXPECT(rebuilt_p, 0))
905 return REBUILT_TABLE_BIN_IND;
906 if (eq_p)
907 break;
908 }
909 else if (EMPTY_BIN_P(bin))
910 return UNDEFINED_BIN_IND;
911#ifdef QUADRATIC_PROBE
912 ind = hash_bin(ind + d, tab);
913 d++;
914#else
915 ind = secondary_hash(ind, tab, &peterb);
916#endif
917 COLLISION;
918 }
919 return ind;
920}
921
922/* Find and return index of table TAB bin corresponding to an entry
923 with HASH_VALUE and KEY. The entry should be in the table
924 already. */
925static st_index_t
926find_table_bin_ind_direct(st_table *tab, st_hash_t hash_value, st_data_t key)
927{
928 st_index_t ind;
929#ifdef QUADRATIC_PROBE
930 st_index_t d;
931#else
932 st_index_t peterb;
933#endif
934 st_index_t bin;
935
936 ind = hash_bin(hash_value, tab);
937#ifdef QUADRATIC_PROBE
938 d = 1;
939#else
940 peterb = hash_value;
941#endif
942 FOUND_BIN;
943 for (;;) {
944 bin = get_bin(tab->bins, get_size_ind(tab), ind);
945 if (EMPTY_OR_DELETED_BIN_P(bin))
946 return ind;
947#ifdef QUADRATIC_PROBE
948 ind = hash_bin(ind + d, tab);
949 d++;
950#else
951 ind = secondary_hash(ind, tab, &peterb);
952#endif
953 COLLISION;
954 }
955}
956
957/* Return index of table TAB bin for HASH_VALUE and KEY through
958 BIN_IND and the pointed value as the function result. Reserve the
959 bin for inclusion of the corresponding entry into the table if it
960 is not there yet. We always find such bin as bins array length is
961 bigger entries array. Although we can reuse a deleted bin, the
962 result bin value is always empty if the table has no entry with
963 KEY. Return the entries array index of the found entry or
964 UNDEFINED_ENTRY_IND if it is not found. If the table was rebuilt
965 during the search, return REBUILT_TABLE_ENTRY_IND. */
966static st_index_t
967find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
968 st_data_t key, st_index_t *bin_ind)
969{
970 int eq_p, rebuilt_p;
971 st_index_t ind;
972 st_hash_t curr_hash_value = *hash_value;
973#ifdef QUADRATIC_PROBE
974 st_index_t d;
975#else
976 st_index_t peterb;
977#endif
978 st_index_t entry_index;
979 st_index_t first_deleted_bin_ind;
980 st_table_entry *entries;
981
982 ind = hash_bin(curr_hash_value, tab);
983#ifdef QUADRATIC_PROBE
984 d = 1;
985#else
986 peterb = curr_hash_value;
987#endif
988 FOUND_BIN;
989 first_deleted_bin_ind = UNDEFINED_BIN_IND;
990 entries = tab->entries;
991 for (;;) {
992 entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
993 if (EMPTY_BIN_P(entry_index)) {
994 tab->num_entries++;
995 entry_index = UNDEFINED_ENTRY_IND;
996 if (first_deleted_bin_ind != UNDEFINED_BIN_IND) {
997 /* We can reuse bin of a deleted entry. */
998 ind = first_deleted_bin_ind;
999 MARK_BIN_EMPTY(tab, ind);
1000 }
1001 break;
1002 }
1003 else if (! DELETED_BIN_P(entry_index)) {
1004 DO_PTR_EQUAL_CHECK(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key, eq_p, rebuilt_p);
1005 if (EXPECT(rebuilt_p, 0))
1006 return REBUILT_TABLE_ENTRY_IND;
1007 if (eq_p)
1008 break;
1009 }
1010 else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
1011 first_deleted_bin_ind = ind;
1012#ifdef QUADRATIC_PROBE
1013 ind = hash_bin(ind + d, tab);
1014 d++;
1015#else
1016 ind = secondary_hash(ind, tab, &peterb);
1017#endif
1018 COLLISION;
1019 }
1020 *bin_ind = ind;
1021 return entry_index;
1022}
1023
1024/* Find an entry with KEY in table TAB. Return non-zero if we found
1025 it. Set up *RECORD to the found entry record. */
1026int
1027st_lookup(st_table *tab, st_data_t key, st_data_t *value)
1028{
1029 st_index_t bin;
1030 st_hash_t hash = do_hash(key, tab);
1031
1032 retry:
1033 if (tab->bins == NULL) {
1034 bin = find_entry(tab, hash, key);
1035 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1036 goto retry;
1037 if (bin == UNDEFINED_ENTRY_IND)
1038 return 0;
1039 }
1040 else {
1041 bin = find_table_entry_ind(tab, hash, key);
1042 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1043 goto retry;
1044 if (bin == UNDEFINED_ENTRY_IND)
1045 return 0;
1046 bin -= ENTRY_BASE;
1047 }
1048 if (value != 0)
1049 *value = tab->entries[bin].record;
1050 return 1;
1051}
1052
1053/* Find an entry with KEY in table TAB. Return non-zero if we found
1054 it. Set up *RESULT to the found table entry key. */
1055int
1056st_get_key(st_table *tab, st_data_t key, st_data_t *result)
1057{
1058 st_index_t bin;
1059 st_hash_t hash = do_hash(key, tab);
1060
1061 retry:
1062 if (tab->bins == NULL) {
1063 bin = find_entry(tab, hash, key);
1064 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1065 goto retry;
1066 if (bin == UNDEFINED_ENTRY_IND)
1067 return 0;
1068 }
1069 else {
1070 bin = find_table_entry_ind(tab, hash, key);
1071 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1072 goto retry;
1073 if (bin == UNDEFINED_ENTRY_IND)
1074 return 0;
1075 bin -= ENTRY_BASE;
1076 }
1077 if (result != 0)
1078 *result = tab->entries[bin].key;
1079 return 1;
1080}
1081
1082/* Check the table and rebuild it if it is necessary. */
1083static inline void
1084rebuild_table_if_necessary (st_table *tab)
1085{
1086 st_index_t bound = tab->entries_bound;
1087
1088 if (bound == get_allocated_entries(tab))
1089 rebuild_table(tab);
1090}
1091
1092/* Insert (KEY, VALUE) into table TAB and return zero. If there is
1093 already entry with KEY in the table, return nonzero and update
1094 the value of the found entry. */
1095int
1096st_insert(st_table *tab, st_data_t key, st_data_t value)
1097{
1098 st_table_entry *entry;
1099 st_index_t bin;
1100 st_index_t ind;
1101 st_hash_t hash_value;
1102 st_index_t bin_ind;
1103 int new_p;
1104
1105 hash_value = do_hash(key, tab);
1106 retry:
1107 rebuild_table_if_necessary(tab);
1108 if (tab->bins == NULL) {
1109 bin = find_entry(tab, hash_value, key);
1110 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1111 goto retry;
1112 new_p = bin == UNDEFINED_ENTRY_IND;
1113 if (new_p)
1114 tab->num_entries++;
1115 bin_ind = UNDEFINED_BIN_IND;
1116 }
1117 else {
1118 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1119 key, &bin_ind);
1120 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1121 goto retry;
1122 new_p = bin == UNDEFINED_ENTRY_IND;
1123 bin -= ENTRY_BASE;
1124 }
1125 if (new_p) {
1126 ind = tab->entries_bound++;
1127 entry = &tab->entries[ind];
1128 entry->hash = hash_value;
1129 entry->key = key;
1130 entry->record = value;
1131 if (bin_ind != UNDEFINED_BIN_IND)
1132 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1133 return 0;
1134 }
1135 tab->entries[bin].record = value;
1136 return 1;
1137}
1138
1139/* Insert (KEY, VALUE, HASH) into table TAB. The table should not have
1140 entry with KEY before the insertion. */
1141static inline void
1142st_add_direct_with_hash(st_table *tab,
1143 st_data_t key, st_data_t value, st_hash_t hash)
1144{
1145 st_table_entry *entry;
1146 st_index_t ind;
1147 st_index_t bin_ind;
1148
1149 rebuild_table_if_necessary(tab);
1150 ind = tab->entries_bound++;
1151 entry = &tab->entries[ind];
1152 entry->hash = hash;
1153 entry->key = key;
1154 entry->record = value;
1155 tab->num_entries++;
1156 if (tab->bins != NULL) {
1157 bin_ind = find_table_bin_ind_direct(tab, hash, key);
1158 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1159 }
1160}
1161
1162void
1163rb_st_add_direct_with_hash(st_table *tab,
1164 st_data_t key, st_data_t value, st_hash_t hash)
1165{
1166 st_add_direct_with_hash(tab, key, value, hash);
1167}
1168
1169/* Insert (KEY, VALUE) into table TAB. The table should not have
1170 entry with KEY before the insertion. */
1171void
1172st_add_direct(st_table *tab, st_data_t key, st_data_t value)
1173{
1174 st_hash_t hash_value;
1175
1176 hash_value = do_hash(key, tab);
1177 st_add_direct_with_hash(tab, key, value, hash_value);
1178}
1179
1180/* Insert (FUNC(KEY), VALUE) into table TAB and return zero. If
1181 there is already entry with KEY in the table, return nonzero and
1182 update the value of the found entry. */
1183int
1184st_insert2(st_table *tab, st_data_t key, st_data_t value,
1185 st_data_t (*func)(st_data_t))
1186{
1187 st_table_entry *entry;
1188 st_index_t bin;
1189 st_index_t ind;
1190 st_hash_t hash_value;
1191 st_index_t bin_ind;
1192 int new_p;
1193
1194 hash_value = do_hash(key, tab);
1195 retry:
1196 rebuild_table_if_necessary (tab);
1197 if (tab->bins == NULL) {
1198 bin = find_entry(tab, hash_value, key);
1199 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1200 goto retry;
1201 new_p = bin == UNDEFINED_ENTRY_IND;
1202 if (new_p)
1203 tab->num_entries++;
1204 bin_ind = UNDEFINED_BIN_IND;
1205 }
1206 else {
1207 bin = find_table_bin_ptr_and_reserve(tab, &hash_value,
1208 key, &bin_ind);
1209 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1210 goto retry;
1211 new_p = bin == UNDEFINED_ENTRY_IND;
1212 bin -= ENTRY_BASE;
1213 }
1214 if (new_p) {
1215 key = (*func)(key);
1216 ind = tab->entries_bound++;
1217 entry = &tab->entries[ind];
1218 entry->hash = hash_value;
1219 entry->key = key;
1220 entry->record = value;
1221 if (bin_ind != UNDEFINED_BIN_IND)
1222 set_bin(tab->bins, get_size_ind(tab), bin_ind, ind + ENTRY_BASE);
1223 return 0;
1224 }
1225 tab->entries[bin].record = value;
1226 return 1;
1227}
1228
1229/* Create and return a copy of table OLD_TAB. */
1230st_table *
1231st_copy(st_table *old_tab)
1232{
1233 st_table *new_tab;
1234
1235 new_tab = (st_table *) malloc(sizeof(st_table));
1236#ifndef RUBY
1237 if (new_tab == NULL)
1238 return NULL;
1239#endif
1240 *new_tab = *old_tab;
1241 if (old_tab->bins == NULL)
1242 new_tab->bins = NULL;
1243 else {
1244 new_tab->bins = (st_index_t *) malloc(bins_size(old_tab));
1245#ifndef RUBY
1246 if (new_tab->bins == NULL) {
1247 free(new_tab);
1248 return NULL;
1249 }
1250#endif
1251 }
1252 new_tab->entries = (st_table_entry *) malloc(get_allocated_entries(old_tab)
1253 * sizeof(st_table_entry));
1254#ifndef RUBY
1255 if (new_tab->entries == NULL) {
1256 st_free_table(new_tab);
1257 return NULL;
1258 }
1259#endif
1260 MEMCPY(new_tab->entries, old_tab->entries, st_table_entry,
1261 get_allocated_entries(old_tab));
1262 if (old_tab->bins != NULL)
1263 MEMCPY(new_tab->bins, old_tab->bins, char, bins_size(old_tab));
1264 return new_tab;
1265}
1266
1267/* Update the entries start of table TAB after removing an entry
1268 with index N in the array entries. */
1269static inline void
1270update_range_for_deleted(st_table *tab, st_index_t n)
1271{
1272 /* Do not update entries_bound here. Otherwise, we can fill all
1273 bins by deleted entry value before rebuilding the table. */
1274 if (tab->entries_start == n) {
1275 st_index_t start = n + 1;
1276 st_index_t bound = tab->entries_bound;
1277 st_table_entry *entries = tab->entries;
1278 while (start < bound && DELETED_ENTRY_P(&entries[start])) start++;
1279 tab->entries_start = start;
1280 }
1281}
1282
1283/* Delete entry with KEY from table TAB, set up *VALUE (unless
1284 VALUE is zero) from deleted table entry, and return non-zero. If
1285 there is no entry with KEY in the table, clear *VALUE (unless VALUE
1286 is zero), and return zero. */
1287static int
1288st_general_delete(st_table *tab, st_data_t *key, st_data_t *value)
1289{
1290 st_table_entry *entry;
1291 st_index_t bin;
1292 st_index_t bin_ind;
1293 st_hash_t hash;
1294
1295 hash = do_hash(*key, tab);
1296 retry:
1297 if (tab->bins == NULL) {
1298 bin = find_entry(tab, hash, *key);
1299 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1300 goto retry;
1301 if (bin == UNDEFINED_ENTRY_IND) {
1302 if (value != 0) *value = 0;
1303 return 0;
1304 }
1305 }
1306 else {
1307 bin_ind = find_table_bin_ind(tab, hash, *key);
1308 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1309 goto retry;
1310 if (bin_ind == UNDEFINED_BIN_IND) {
1311 if (value != 0) *value = 0;
1312 return 0;
1313 }
1314 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1315 MARK_BIN_DELETED(tab, bin_ind);
1316 }
1317 entry = &tab->entries[bin];
1318 *key = entry->key;
1319 if (value != 0) *value = entry->record;
1320 MARK_ENTRY_DELETED(entry);
1321 tab->num_entries--;
1322 update_range_for_deleted(tab, bin);
1323 return 1;
1324}
1325
1326int
1327st_delete(st_table *tab, st_data_t *key, st_data_t *value)
1328{
1329 return st_general_delete(tab, key, value);
1330}
1331
1332/* The function and other functions with suffix '_safe' or '_check'
1333 are originated from the previous implementation of the hash tables.
1334 It was necessary for correct deleting entries during traversing
1335 tables. The current implementation permits deletion during
1336 traversing without a specific way to do this. */
1337int
1338st_delete_safe(st_table *tab, st_data_t *key, st_data_t *value,
1339 st_data_t never ATTRIBUTE_UNUSED)
1340{
1341 return st_general_delete(tab, key, value);
1342}
1343
1344/* If table TAB is empty, clear *VALUE (unless VALUE is zero), and
1345 return zero. Otherwise, remove the first entry in the table.
1346 Return its key through KEY and its record through VALUE (unless
1347 VALUE is zero). */
1348int
1349st_shift(st_table *tab, st_data_t *key, st_data_t *value)
1350{
1351 st_index_t i, bound;
1352 st_index_t bin;
1353 st_table_entry *entries, *curr_entry_ptr;
1354 st_index_t bin_ind;
1355
1356 entries = tab->entries;
1357 bound = tab->entries_bound;
1358 for (i = tab->entries_start; i < bound; i++) {
1359 curr_entry_ptr = &entries[i];
1360 if (! DELETED_ENTRY_P(curr_entry_ptr)) {
1361 st_hash_t entry_hash = curr_entry_ptr->hash;
1362 st_data_t entry_key = curr_entry_ptr->key;
1363
1364 if (value != 0) *value = curr_entry_ptr->record;
1365 *key = entry_key;
1366 retry:
1367 if (tab->bins == NULL) {
1368 bin = find_entry(tab, entry_hash, entry_key);
1369 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0)) {
1370 entries = tab->entries;
1371 goto retry;
1372 }
1373 curr_entry_ptr = &entries[bin];
1374 }
1375 else {
1376 bin_ind = find_table_bin_ind(tab, entry_hash, entry_key);
1377 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0)) {
1378 entries = tab->entries;
1379 goto retry;
1380 }
1381 curr_entry_ptr = &entries[get_bin(tab->bins, get_size_ind(tab), bin_ind)
1382 - ENTRY_BASE];
1383 MARK_BIN_DELETED(tab, bin_ind);
1384 }
1385 MARK_ENTRY_DELETED(curr_entry_ptr);
1386 tab->num_entries--;
1387 update_range_for_deleted(tab, i);
1388 return 1;
1389 }
1390 }
1391 if (value != 0) *value = 0;
1392 return 0;
1393}
1394
1395/* See comments for function st_delete_safe. */
1396void
1397st_cleanup_safe(st_table *tab ATTRIBUTE_UNUSED,
1398 st_data_t never ATTRIBUTE_UNUSED)
1399{
1400}
1401
1402/* Find entry with KEY in table TAB, call FUNC with pointers to copies
1403 of the key and the value of the found entry, and non-zero as the
1404 3rd argument. If the entry is not found, call FUNC with a pointer
1405 to KEY, a pointer to zero, and a zero argument. If the call
1406 returns ST_CONTINUE, the table will have an entry with key and
1407 value returned by FUNC through the 1st and 2nd parameters. If the
1408 call of FUNC returns ST_DELETE, the table will not have entry with
1409 KEY. The function returns flag of that the entry with KEY was in
1410 the table before the call. */
1411int
1412st_update(st_table *tab, st_data_t key,
1413 st_update_callback_func *func, st_data_t arg)
1414{
1415 st_table_entry *entry = NULL; /* to avoid uninitialized value warning */
1416 st_index_t bin = 0; /* Ditto */
1417 st_table_entry *entries;
1418 st_index_t bin_ind;
1419 st_data_t value = 0, old_key;
1420 int retval, existing;
1421 st_hash_t hash = do_hash(key, tab);
1422
1423 retry:
1424 entries = tab->entries;
1425 if (tab->bins == NULL) {
1426 bin = find_entry(tab, hash, key);
1427 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1428 goto retry;
1429 existing = bin != UNDEFINED_ENTRY_IND;
1430 entry = &entries[bin];
1431 bin_ind = UNDEFINED_BIN_IND;
1432 }
1433 else {
1434 bin_ind = find_table_bin_ind(tab, hash, key);
1435 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1436 goto retry;
1437 existing = bin_ind != UNDEFINED_BIN_IND;
1438 if (existing) {
1439 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1440 entry = &entries[bin];
1441 }
1442 }
1443 if (existing) {
1444 key = entry->key;
1445 value = entry->record;
1446 }
1447 old_key = key;
1448 retval = (*func)(&key, &value, arg, existing);
1449 switch (retval) {
1450 case ST_CONTINUE:
1451 if (! existing) {
1452 st_add_direct_with_hash(tab, key, value, hash);
1453 break;
1454 }
1455 if (old_key != key) {
1456 entry->key = key;
1457 }
1458 entry->record = value;
1459 break;
1460 case ST_DELETE:
1461 if (existing) {
1462 if (bin_ind != UNDEFINED_BIN_IND)
1463 MARK_BIN_DELETED(tab, bin_ind);
1464 MARK_ENTRY_DELETED(entry);
1465 tab->num_entries--;
1466 update_range_for_deleted(tab, bin);
1467 }
1468 break;
1469 }
1470 return existing;
1471}
1472
1473/* Traverse all entries in table TAB calling FUNC with current entry
1474 key and value and zero. If the call returns ST_STOP, stop
1475 traversing. If the call returns ST_DELETE, delete the current
1476 entry from the table. In case of ST_CHECK or ST_CONTINUE, continue
1477 traversing. The function returns zero unless an error is found.
1478 CHECK_P is flag of st_foreach_check call. The behavior is a bit
1479 different for ST_CHECK and when the current element is removed
1480 during traversing. */
1481static inline int
1482st_general_foreach(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg,
1483 int check_p)
1484{
1485 st_index_t bin;
1486 st_index_t bin_ind;
1487 st_table_entry *entries, *curr_entry_ptr;
1488 enum st_retval retval;
1489 st_index_t i, rebuilds_num;
1490 st_hash_t hash;
1491 st_data_t key;
1492 int error_p, packed_p = tab->bins == NULL;
1493
1494 entries = tab->entries;
1495 /* The bound can change inside the loop even without rebuilding
1496 the table, e.g. by an entry insertion. */
1497 for (i = tab->entries_start; i < tab->entries_bound; i++) {
1498 curr_entry_ptr = &entries[i];
1499 if (EXPECT(DELETED_ENTRY_P(curr_entry_ptr), 0))
1500 continue;
1501 key = curr_entry_ptr->key;
1502 rebuilds_num = tab->rebuilds_num;
1503 hash = curr_entry_ptr->hash;
1504 retval = (*func)(key, curr_entry_ptr->record, arg, 0);
1505
1506 if (retval == ST_REPLACE && replace) {
1507 st_data_t value;
1508 value = curr_entry_ptr->record;
1509 retval = (*replace)(&key, &value, arg, TRUE);
1510 curr_entry_ptr->key = key;
1511 curr_entry_ptr->record = value;
1512 }
1513
1514 if (rebuilds_num != tab->rebuilds_num) {
1515 retry:
1516 entries = tab->entries;
1517 packed_p = tab->bins == NULL;
1518 if (packed_p) {
1519 i = find_entry(tab, hash, key);
1520 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1521 goto retry;
1522 error_p = i == UNDEFINED_ENTRY_IND;
1523 }
1524 else {
1525 i = find_table_entry_ind(tab, hash, key);
1526 if (EXPECT(i == REBUILT_TABLE_ENTRY_IND, 0))
1527 goto retry;
1528 error_p = i == UNDEFINED_ENTRY_IND;
1529 i -= ENTRY_BASE;
1530 }
1531 if (error_p && check_p) {
1532 /* call func with error notice */
1533 retval = (*func)(0, 0, arg, 1);
1534 return 1;
1535 }
1536 curr_entry_ptr = &entries[i];
1537 }
1538 switch (retval) {
1539 case ST_REPLACE:
1540 break;
1541 case ST_CONTINUE:
1542 break;
1543 case ST_CHECK:
1544 if (check_p)
1545 break;
1546 case ST_STOP:
1547 return 0;
1548 case ST_DELETE: {
1549 st_data_t key = curr_entry_ptr->key;
1550
1551 again:
1552 if (packed_p) {
1553 bin = find_entry(tab, hash, key);
1554 if (EXPECT(bin == REBUILT_TABLE_ENTRY_IND, 0))
1555 goto again;
1556 if (bin == UNDEFINED_ENTRY_IND)
1557 break;
1558 }
1559 else {
1560 bin_ind = find_table_bin_ind(tab, hash, key);
1561 if (EXPECT(bin_ind == REBUILT_TABLE_BIN_IND, 0))
1562 goto again;
1563 if (bin_ind == UNDEFINED_BIN_IND)
1564 break;
1565 bin = get_bin(tab->bins, get_size_ind(tab), bin_ind) - ENTRY_BASE;
1566 MARK_BIN_DELETED(tab, bin_ind);
1567 }
1568 curr_entry_ptr = &entries[bin];
1569 MARK_ENTRY_DELETED(curr_entry_ptr);
1570 tab->num_entries--;
1571 update_range_for_deleted(tab, bin);
1572 break;
1573 }
1574 }
1575 }
1576 return 0;
1577}
1578
1579int
1580st_foreach_with_replace(st_table *tab, st_foreach_check_callback_func *func, st_update_callback_func *replace, st_data_t arg)
1581{
1582 return st_general_foreach(tab, func, replace, arg, TRUE);
1583}
1584
1585struct functor {
1586 st_foreach_callback_func *func;
1587 st_data_t arg;
1588};
1589
1590static int
1591apply_functor(st_data_t k, st_data_t v, st_data_t d, int _)
1592{
1593 const struct functor *f = (void *)d;
1594 return f->func(k, v, f->arg);
1595}
1596
1597int
1598st_foreach(st_table *tab, st_foreach_callback_func *func, st_data_t arg)
1599{
1600 const struct functor f = { func, arg };
1601 return st_general_foreach(tab, apply_functor, 0, (st_data_t)&f, FALSE);
1602}
1603
1604/* See comments for function st_delete_safe. */
1605int
1606st_foreach_check(st_table *tab, st_foreach_check_callback_func *func, st_data_t arg,
1607 st_data_t never ATTRIBUTE_UNUSED)
1608{
1609 return st_general_foreach(tab, func, 0, arg, TRUE);
1610}
1611
1612/* Set up array KEYS by at most SIZE keys of head table TAB entries.
1613 Return the number of keys set up in array KEYS. */
1614static inline st_index_t
1615st_general_keys(st_table *tab, st_data_t *keys, st_index_t size)
1616{
1617 st_index_t i, bound;
1618 st_data_t key, *keys_start, *keys_end;
1619 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1620
1621 bound = tab->entries_bound;
1622 keys_start = keys;
1623 keys_end = keys + size;
1624 for (i = tab->entries_start; i < bound; i++) {
1625 if (keys == keys_end)
1626 break;
1627 curr_entry_ptr = &entries[i];
1628 key = curr_entry_ptr->key;
1629 if (! DELETED_ENTRY_P(curr_entry_ptr))
1630 *keys++ = key;
1631 }
1632
1633 return keys - keys_start;
1634}
1635
1636st_index_t
1637st_keys(st_table *tab, st_data_t *keys, st_index_t size)
1638{
1639 return st_general_keys(tab, keys, size);
1640}
1641
1642/* See comments for function st_delete_safe. */
1643st_index_t
1644st_keys_check(st_table *tab, st_data_t *keys, st_index_t size,
1645 st_data_t never ATTRIBUTE_UNUSED)
1646{
1647 return st_general_keys(tab, keys, size);
1648}
1649
1650/* Set up array VALUES by at most SIZE values of head table TAB
1651 entries. Return the number of values set up in array VALUES. */
1652static inline st_index_t
1653st_general_values(st_table *tab, st_data_t *values, st_index_t size)
1654{
1655 st_index_t i, bound;
1656 st_data_t *values_start, *values_end;
1657 st_table_entry *curr_entry_ptr, *entries = tab->entries;
1658
1659 values_start = values;
1660 values_end = values + size;
1661 bound = tab->entries_bound;
1662 for (i = tab->entries_start; i < bound; i++) {
1663 if (values == values_end)
1664 break;
1665 curr_entry_ptr = &entries[i];
1666 if (! DELETED_ENTRY_P(curr_entry_ptr))
1667 *values++ = curr_entry_ptr->record;
1668 }
1669
1670 return values - values_start;
1671}
1672
1673st_index_t
1674st_values(st_table *tab, st_data_t *values, st_index_t size)
1675{
1676 return st_general_values(tab, values, size);
1677}
1678
1679/* See comments for function st_delete_safe. */
1680st_index_t
1681st_values_check(st_table *tab, st_data_t *values, st_index_t size,
1682 st_data_t never ATTRIBUTE_UNUSED)
1683{
1684 return st_general_values(tab, values, size);
1685}
1686
1687#define FNV1_32A_INIT 0x811c9dc5
1688
1689/*
1690 * 32 bit magic FNV-1a prime
1691 */
1692#define FNV_32_PRIME 0x01000193
1693
1694/* __POWERPC__ added to accommodate Darwin case. */
1695#ifndef UNALIGNED_WORD_ACCESS
1696# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1697 defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
1698 defined(__powerpc64__) || defined(__POWERPC__) || defined(__aarch64__) || \
1699 defined(__mc68020__)
1700# define UNALIGNED_WORD_ACCESS 1
1701# endif
1702#endif
1703#ifndef UNALIGNED_WORD_ACCESS
1704# define UNALIGNED_WORD_ACCESS 0
1705#endif
1706
1707/* This hash function is quite simplified MurmurHash3
1708 * Simplification is legal, cause most of magic still happens in finalizator.
1709 * And finalizator is almost the same as in MurmurHash3 */
1710#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1711#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1712
1713#if ST_INDEX_BITS <= 32
1714#define C1 (st_index_t)0xcc9e2d51
1715#define C2 (st_index_t)0x1b873593
1716#else
1717#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1718#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1719#endif
1720NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_step(st_index_t h, st_index_t k));
1721NO_SANITIZE("unsigned-integer-overflow", static inline st_index_t murmur_finish(st_index_t h));
1722NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash(const void *ptr, size_t len, st_index_t h));
1723
1724static inline st_index_t
1725murmur_step(st_index_t h, st_index_t k)
1726{
1727#if ST_INDEX_BITS <= 32
1728#define r1 (17)
1729#define r2 (11)
1730#else
1731#define r1 (33)
1732#define r2 (24)
1733#endif
1734 k *= C1;
1735 h ^= ROTL(k, r1);
1736 h *= C2;
1737 h = ROTL(h, r2);
1738 return h;
1739}
1740#undef r1
1741#undef r2
1742
1743static inline st_index_t
1744murmur_finish(st_index_t h)
1745{
1746#if ST_INDEX_BITS <= 32
1747#define r1 (16)
1748#define r2 (13)
1749#define r3 (16)
1750 const st_index_t c1 = 0x85ebca6b;
1751 const st_index_t c2 = 0xc2b2ae35;
1752#else
1753/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1754#define r1 (30)
1755#define r2 (27)
1756#define r3 (31)
1757 const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1758 const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1759#endif
1760#if ST_INDEX_BITS > 64
1761 h ^= h >> 64;
1762 h *= c2;
1763 h ^= h >> 65;
1764#endif
1765 h ^= h >> r1;
1766 h *= c1;
1767 h ^= h >> r2;
1768 h *= c2;
1769 h ^= h >> r3;
1770 return h;
1771}
1772#undef r1
1773#undef r2
1774#undef r3
1775
1776st_index_t
1777st_hash(const void *ptr, size_t len, st_index_t h)
1778{
1779 const char *data = ptr;
1780 st_index_t t = 0;
1781 size_t l = len;
1782
1783#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1784#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
1785#if SIZEOF_ST_INDEX_T > 4
1786#define UNALIGNED_ADD_8 UNALIGNED_ADD(6); UNALIGNED_ADD(5); UNALIGNED_ADD(4); UNALIGNED_ADD(3); UNALIGNED_ADD_4
1787#if SIZEOF_ST_INDEX_T > 8
1788#define UNALIGNED_ADD_16 UNALIGNED_ADD(14); UNALIGNED_ADD(13); UNALIGNED_ADD(12); UNALIGNED_ADD(11); \
1789 UNALIGNED_ADD(10); UNALIGNED_ADD(9); UNALIGNED_ADD(8); UNALIGNED_ADD(7); UNALIGNED_ADD_8
1790#define UNALIGNED_ADD_ALL UNALIGNED_ADD_16
1791#endif
1792#define UNALIGNED_ADD_ALL UNALIGNED_ADD_8
1793#else
1794#define UNALIGNED_ADD_ALL UNALIGNED_ADD_4
1795#endif
1796#undef SKIP_TAIL
1797 if (len >= sizeof(st_index_t)) {
1798#if !UNALIGNED_WORD_ACCESS
1799 int align = (int)((st_data_t)data % sizeof(st_index_t));
1800 if (align) {
1801 st_index_t d = 0;
1802 int sl, sr, pack;
1803
1804 switch (align) {
1805#ifdef WORDS_BIGENDIAN
1806# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1807 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 2)
1808#else
1809# define UNALIGNED_ADD(n) case SIZEOF_ST_INDEX_T - (n) - 1: \
1810 t |= data_at(n) << CHAR_BIT*(n)
1811#endif
1812 UNALIGNED_ADD_ALL;
1813#undef UNALIGNED_ADD
1814 }
1815
1816#ifdef WORDS_BIGENDIAN
1817 t >>= (CHAR_BIT * align) - CHAR_BIT;
1818#else
1819 t <<= (CHAR_BIT * align);
1820#endif
1821
1822 data += sizeof(st_index_t)-align;
1823 len -= sizeof(st_index_t)-align;
1824
1825 sl = CHAR_BIT * (SIZEOF_ST_INDEX_T-align);
1826 sr = CHAR_BIT * align;
1827
1828 while (len >= sizeof(st_index_t)) {
1829 d = *(st_index_t *)data;
1830#ifdef WORDS_BIGENDIAN
1831 t = (t << sr) | (d >> sl);
1832#else
1833 t = (t >> sr) | (d << sl);
1834#endif
1835 h = murmur_step(h, t);
1836 t = d;
1837 data += sizeof(st_index_t);
1838 len -= sizeof(st_index_t);
1839 }
1840
1841 pack = len < (size_t)align ? (int)len : align;
1842 d = 0;
1843 switch (pack) {
1844#ifdef WORDS_BIGENDIAN
1845# define UNALIGNED_ADD(n) case (n) + 1: \
1846 d |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1847#else
1848# define UNALIGNED_ADD(n) case (n) + 1: \
1849 d |= data_at(n) << CHAR_BIT*(n)
1850#endif
1851 UNALIGNED_ADD_ALL;
1852#undef UNALIGNED_ADD
1853 }
1854#ifdef WORDS_BIGENDIAN
1855 t = (t << sr) | (d >> sl);
1856#else
1857 t = (t >> sr) | (d << sl);
1858#endif
1859
1860 if (len < (size_t)align) goto skip_tail;
1861# define SKIP_TAIL 1
1862 h = murmur_step(h, t);
1863 data += pack;
1864 len -= pack;
1865 }
1866 else
1867#endif
1868#ifdef HAVE_BUILTIN___BUILTIN_ASSUME_ALIGNED
1869#define aligned_data __builtin_assume_aligned(data, sizeof(st_index_t))
1870#else
1871#define aligned_data data
1872#endif
1873 {
1874 do {
1875 h = murmur_step(h, *(st_index_t *)aligned_data);
1876 data += sizeof(st_index_t);
1877 len -= sizeof(st_index_t);
1878 } while (len >= sizeof(st_index_t));
1879 }
1880 }
1881
1882 t = 0;
1883 switch (len) {
1884#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1885 /* in this case byteorder doesn't really matter */
1886#if SIZEOF_ST_INDEX_T > 4
1887 case 7: t |= data_at(6) << 48;
1888 case 6: t |= data_at(5) << 40;
1889 case 5: t |= data_at(4) << 32;
1890 case 4:
1891 t |= (st_index_t)*(uint32_t*)aligned_data;
1892 goto skip_tail;
1893# define SKIP_TAIL 1
1894#endif
1895 case 3: t |= data_at(2) << 16;
1896 case 2: t |= data_at(1) << 8;
1897 case 1: t |= data_at(0);
1898#else
1899#ifdef WORDS_BIGENDIAN
1900# define UNALIGNED_ADD(n) case (n) + 1: \
1901 t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
1902#else
1903# define UNALIGNED_ADD(n) case (n) + 1: \
1904 t |= data_at(n) << CHAR_BIT*(n)
1905#endif
1906 UNALIGNED_ADD_ALL;
1907#undef UNALIGNED_ADD
1908#endif
1909#ifdef SKIP_TAIL
1910 skip_tail:
1911#endif
1912 h ^= t; h -= ROTL(t, 7);
1913 h *= C2;
1914 }
1915 h ^= l;
1916#undef aligned_data
1917
1918 return murmur_finish(h);
1919}
1920
1921st_index_t
1922st_hash_uint32(st_index_t h, uint32_t i)
1923{
1924 return murmur_step(h, i);
1925}
1926
1927NO_SANITIZE("unsigned-integer-overflow", extern st_index_t st_hash_uint(st_index_t h, st_index_t i));
1928st_index_t
1929st_hash_uint(st_index_t h, st_index_t i)
1930{
1931 i += h;
1932/* no matter if it is BigEndian or LittleEndian,
1933 * we hash just integers */
1934#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1935 h = murmur_step(h, i >> 8*8);
1936#endif
1937 h = murmur_step(h, i);
1938 return h;
1939}
1940
1941st_index_t
1942st_hash_end(st_index_t h)
1943{
1944 h = murmur_finish(h);
1945 return h;
1946}
1947
1948#undef st_hash_start
1949st_index_t
1950rb_st_hash_start(st_index_t h)
1951{
1952 return h;
1953}
1954
1955static st_index_t
1956strhash(st_data_t arg)
1957{
1958 register const char *string = (const char *)arg;
1959 return st_hash(string, strlen(string), FNV1_32A_INIT);
1960}
1961
1962int
1963st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1964{
1965 char c1, c2;
1966
1967 while (1) {
1968 c1 = *s1++;
1969 c2 = *s2++;
1970 if (c1 == '\0' || c2 == '\0') {
1971 if (c1 != '\0') return 1;
1972 if (c2 != '\0') return -1;
1973 return 0;
1974 }
1975 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
1976 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
1977 if (c1 != c2) {
1978 if (c1 > c2)
1979 return 1;
1980 else
1981 return -1;
1982 }
1983 }
1984}
1985
1986int
1987st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n)
1988{
1989 char c1, c2;
1990 size_t i;
1991
1992 for (i = 0; i < n; i++) {
1993 c1 = *s1++;
1994 c2 = *s2++;
1995 if (c1 == '\0' || c2 == '\0') {
1996 if (c1 != '\0') return 1;
1997 if (c2 != '\0') return -1;
1998 return 0;
1999 }
2000 if (('A' <= c1) && (c1 <= 'Z')) c1 += 'a' - 'A';
2001 if (('A' <= c2) && (c2 <= 'Z')) c2 += 'a' - 'A';
2002 if (c1 != c2) {
2003 if (c1 > c2)
2004 return 1;
2005 else
2006 return -1;
2007 }
2008 }
2009 return 0;
2010}
2011
2012static int
2013st_strcmp(st_data_t lhs, st_data_t rhs)
2014{
2015 const char *s1 = (char *)lhs;
2016 const char *s2 = (char *)rhs;
2017 return strcmp(s1, s2);
2018}
2019
2020static int
2021st_locale_insensitive_strcasecmp_i(st_data_t lhs, st_data_t rhs)
2022{
2023 const char *s1 = (char *)lhs;
2024 const char *s2 = (char *)rhs;
2025 return st_locale_insensitive_strcasecmp(s1, s2);
2026}
2027
2028NO_SANITIZE("unsigned-integer-overflow", PUREFUNC(static st_index_t strcasehash(st_data_t)));
2029static st_index_t
2030strcasehash(st_data_t arg)
2031{
2032 register const char *string = (const char *)arg;
2033 register st_index_t hval = FNV1_32A_INIT;
2034
2035 /*
2036 * FNV-1a hash each octet in the buffer
2037 */
2038 while (*string) {
2039 unsigned int c = (unsigned char)*string++;
2040 if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
2041 hval ^= c;
2042
2043 /* multiply by the 32 bit FNV magic prime mod 2^32 */
2044 hval *= FNV_32_PRIME;
2045 }
2046 return hval;
2047}
2048
2049int
2050st_numcmp(st_data_t x, st_data_t y)
2051{
2052 return x != y;
2053}
2054
2055st_index_t
2056st_numhash(st_data_t n)
2057{
2058 enum {s1 = 11, s2 = 3};
2059 return (st_index_t)((n>>s1|(n<<s2)) ^ (n>>s2));
2060}
2061
2062/* Expand TAB to be suitable for holding SIZ entries in total.
2063 Pre-existing entries remain not deleted inside of TAB, but its bins
2064 are cleared to expect future reconstruction. See rehash below. */
2065static void
2066st_expand_table(st_table *tab, st_index_t siz)
2067{
2068 st_table *tmp;
2069 st_index_t n;
2070
2071 if (siz <= get_allocated_entries(tab))
2072 return; /* enough room already */
2073
2074 tmp = st_init_table_with_size(tab->type, siz);
2075 n = get_allocated_entries(tab);
2076 MEMCPY(tmp->entries, tab->entries, st_table_entry, n);
2077 free(tab->entries);
2078 if (tab->bins != NULL)
2079 free(tab->bins);
2080 if (tmp->bins != NULL)
2081 free(tmp->bins);
2082 tab->entry_power = tmp->entry_power;
2083 tab->bin_power = tmp->bin_power;
2084 tab->size_ind = tmp->size_ind;
2085 tab->entries = tmp->entries;
2086 tab->bins = NULL;
2087 tab->rebuilds_num++;
2088 free(tmp);
2089}
2090
2091/* Rehash using linear search. Return TRUE if we found that the table
2092 was rebuilt. */
2093static int
2094st_rehash_linear(st_table *tab)
2095{
2096 int eq_p, rebuilt_p;
2097 st_index_t i, j;
2098 st_table_entry *p, *q;
2099 if (tab->bins) {
2100 free(tab->bins);
2101 tab->bins = NULL;
2102 }
2103 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2104 p = &tab->entries[i];
2105 if (DELETED_ENTRY_P(p))
2106 continue;
2107 for (j = i + 1; j < tab->entries_bound; j++) {
2108 q = &tab->entries[j];
2109 if (DELETED_ENTRY_P(q))
2110 continue;
2111 DO_PTR_EQUAL_CHECK(tab, p, q->hash, q->key, eq_p, rebuilt_p);
2112 if (EXPECT(rebuilt_p, 0))
2113 return TRUE;
2114 if (eq_p) {
2115 *p = *q;
2116 MARK_ENTRY_DELETED(q);
2117 tab->num_entries--;
2118 update_range_for_deleted(tab, j);
2119 }
2120 }
2121 }
2122 return FALSE;
2123}
2124
2125/* Rehash using index. Return TRUE if we found that the table was
2126 rebuilt. */
2127static int
2128st_rehash_indexed(st_table *tab)
2129{
2130 int eq_p, rebuilt_p;
2131 st_index_t i;
2132 st_index_t const n = bins_size(tab);
2133 unsigned int const size_ind = get_size_ind(tab);
2134 st_index_t *bins = realloc(tab->bins, n);
2135 tab->bins = bins;
2136 initialize_bins(tab);
2137 for (i = tab->entries_start; i < tab->entries_bound; i++) {
2138 st_table_entry *p = &tab->entries[i];
2139 st_index_t ind;
2140#ifdef QUADRATIC_PROBE
2141 st_index_t d = 1;
2142#else
2143 st_index_t peterb = p->hash;
2144#endif
2145
2146 if (DELETED_ENTRY_P(p))
2147 continue;
2148
2149 ind = hash_bin(p->hash, tab);
2150 for (;;) {
2151 st_index_t bin = get_bin(bins, size_ind, ind);
2152 if (EMPTY_OR_DELETED_BIN_P(bin)) {
2153 /* ok, new room */
2154 set_bin(bins, size_ind, ind, i + ENTRY_BASE);
2155 break;
2156 }
2157 else {
2158 st_table_entry *q = &tab->entries[bin - ENTRY_BASE];
2159 DO_PTR_EQUAL_CHECK(tab, q, p->hash, p->key, eq_p, rebuilt_p);
2160 if (EXPECT(rebuilt_p, 0))
2161 return TRUE;
2162 if (eq_p) {
2163 /* duplicated key; delete it */
2164 q->record = p->record;
2165 MARK_ENTRY_DELETED(p);
2166 tab->num_entries--;
2167 update_range_for_deleted(tab, bin);
2168 break;
2169 }
2170 else {
2171 /* hash collision; skip it */
2172#ifdef QUADRATIC_PROBE
2173 ind = hash_bin(ind + d, tab);
2174 d++;
2175#else
2176 ind = secondary_hash(ind, tab, &peterb);
2177#endif
2178 }
2179 }
2180 }
2181 }
2182 return FALSE;
2183}
2184
2185/* Reconstruct TAB's bins according to TAB's entries. This function
2186 permits conflicting keys inside of entries. No errors are reported
2187 then. All but one of them are discarded silently. */
2188static void
2189st_rehash(st_table *tab)
2190{
2191 int rebuilt_p;
2192
2193 do {
2194 if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2195 rebuilt_p = st_rehash_linear(tab);
2196 else
2197 rebuilt_p = st_rehash_indexed(tab);
2198 } while (rebuilt_p);
2199}
2200
2201#ifdef RUBY
2202static st_data_t
2203st_stringify(VALUE key)
2204{
2205 return (rb_obj_class(key) == rb_cString && !RB_OBJ_FROZEN(key)) ?
2206 rb_hash_key_str(key) : key;
2207}
2208
2209static void
2210st_insert_single(st_table *tab, VALUE hash, VALUE key, VALUE val)
2211{
2212 st_data_t k = st_stringify(key);
2214 e.hash = do_hash(k, tab);
2215 e.key = k;
2216 e.record = val;
2217
2218 tab->entries[tab->entries_bound++] = e;
2219 tab->num_entries++;
2220 RB_OBJ_WRITTEN(hash, Qundef, k);
2221 RB_OBJ_WRITTEN(hash, Qundef, val);
2222}
2223
2224static void
2225st_insert_linear(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2226{
2227 long i;
2228
2229 for (i = 0; i < argc; /* */) {
2230 st_data_t k = st_stringify(argv[i++]);
2231 st_data_t v = argv[i++];
2232 st_insert(tab, k, v);
2233 RB_OBJ_WRITTEN(hash, Qundef, k);
2234 RB_OBJ_WRITTEN(hash, Qundef, v);
2235 }
2236}
2237
2238static void
2239st_insert_generic(st_table *tab, long argc, const VALUE *argv, VALUE hash)
2240{
2241 long i;
2242
2243 /* push elems */
2244 for (i = 0; i < argc; /* */) {
2245 VALUE key = argv[i++];
2246 VALUE val = argv[i++];
2247 st_insert_single(tab, hash, key, val);
2248 }
2249
2250 /* reindex */
2251 st_rehash(tab);
2252}
2253
2254/* Mimics ruby's { foo => bar } syntax. This function is subpart
2255 of rb_hash_bulk_insert. */
2256void
2257rb_hash_bulk_insert_into_st_table(long argc, const VALUE *argv, VALUE hash)
2258{
2259 st_index_t n, size = argc / 2;
2260 st_table *tab = RHASH_ST_TABLE(hash);
2261
2262 tab = RHASH_TBL_RAW(hash);
2263 n = tab->entries_bound + size;
2264 st_expand_table(tab, n);
2265 if (UNLIKELY(tab->num_entries))
2266 st_insert_generic(tab, argc, argv, hash);
2267 else if (argc <= 2)
2268 st_insert_single(tab, hash, argv[0], argv[1]);
2269 else if (tab->bin_power <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
2270 st_insert_linear(tab, argc, argv, hash);
2271 else
2272 st_insert_generic(tab, argc, argv, hash);
2273}
2274
2275// to iterate iv_index_tbl
2276st_data_t
2277rb_st_nth_key(st_table *tab, st_index_t index)
2278{
2279 if (LIKELY(tab->entries_start == 0 &&
2280 tab->num_entries == tab->entries_bound &&
2281 index < tab->num_entries)) {
2282 return tab->entries[index].key;
2283 }
2284 else {
2285 rb_bug("unreachable");
2286 }
2287}
2288
2289void
2290rb_st_compact_table(st_table *tab)
2291{
2292 st_index_t num = tab->num_entries;
2293 if (REBUILD_THRESHOLD * num <= get_allocated_entries(tab)) {
2294 /* Compaction: */
2295 st_table *new_tab = st_init_table_with_size(tab->type, 2 * num);
2296 rebuild_table_with(new_tab, tab);
2297 }
2298}
2299
2300#endif
static bool RB_OBJ_FROZEN(VALUE obj)
Checks if an object is frozen.
Definition fl_type.h:921
#define Qundef
Old name of RUBY_Qundef.
void rb_raise(VALUE exc, const char *fmt,...)
Exception entry point.
Definition error.c:3150
void rb_bug(const char *fmt,...)
Interpreter panic switch.
Definition error.c:794
VALUE rb_eRuntimeError
RuntimeError exception.
Definition error.c:1089
VALUE rb_obj_class(VALUE obj)
Queries the class of an object.
Definition object.c:190
VALUE rb_cString
String class.
Definition string.c:79
#define RB_OBJ_WRITTEN(old, oldv, young)
Identical to RB_OBJ_WRITE(), except it doesn't write any values, but only a WB declaration.
Definition rgengc.h:232
#define MEMCPY(p1, p2, type, n)
Handy macro to call memcpy.
Definition memory.h:366
VALUE type(ANYARGS)
ANYARGS-ed function type.
#define _(args)
This was a transition path from K&R to ANSI.
Definition stdarg.h:35
Definition st.c:133
Definition st.h:79
uintptr_t VALUE
Type that represents a Ruby object.
Definition value.h:40