2009-06-29 08:13:04 +08:00
|
|
|
/* ----------------------------------------------------------------------- *
|
2009-10-11 17:17:52 +08:00
|
|
|
*
|
2018-12-12 04:30:25 +08:00
|
|
|
* Copyright 1996-2018 The NASM Authors - All Rights Reserved
|
2009-06-29 08:13:04 +08:00
|
|
|
* See the file AUTHORS included with the NASM distribution for
|
|
|
|
* the specific copyright holders.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following
|
|
|
|
* conditions are met:
|
|
|
|
*
|
|
|
|
* * Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* * Redistributions in binary form must reproduce the above
|
|
|
|
* copyright notice, this list of conditions and the following
|
|
|
|
* disclaimer in the documentation and/or other materials provided
|
|
|
|
* with the distribution.
|
2009-10-11 17:17:52 +08:00
|
|
|
*
|
2009-06-29 08:13:04 +08:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
|
|
|
|
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
|
|
|
|
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
|
|
|
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
|
|
|
|
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
|
|
|
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
|
|
|
|
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
|
|
|
|
* EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*
|
|
|
|
* ----------------------------------------------------------------------- */
|
|
|
|
|
2007-09-14 14:34:21 +08:00
|
|
|
/*
|
|
|
|
* hashtbl.c
|
|
|
|
*
|
|
|
|
* Efficient dictionary hash table class.
|
|
|
|
*/
|
|
|
|
|
2007-10-03 12:53:51 +08:00
|
|
|
#include "compiler.h"
|
|
|
|
|
2007-09-14 14:34:21 +08:00
|
|
|
#include "nasm.h"
|
|
|
|
#include "hashtbl.h"
|
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
#define HASH_MAX_LOAD 2 /* Higher = more memory-efficient, slower */
|
|
|
|
#define HASH_INIT_SIZE 16 /* Initial size (power of 2, min 4) */
|
2007-09-14 14:34:21 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
#define hash_calc(key,keylen) crc64b(CRC64_INIT, (key), (keylen))
|
|
|
|
#define hash_calci(key,keylen) crc64ib(CRC64_INIT, (key), (keylen))
|
2009-11-07 02:58:48 +08:00
|
|
|
#define hash_max_load(size) ((size) * (HASH_MAX_LOAD - 1) / HASH_MAX_LOAD)
|
|
|
|
#define hash_expand(size) ((size) << 1)
|
|
|
|
#define hash_mask(size) ((size) - 1)
|
|
|
|
#define hash_pos(hash, mask) ((hash) & (mask))
|
|
|
|
#define hash_inc(hash, mask) ((((hash) >> 32) & (mask)) | 1) /* always odd */
|
|
|
|
#define hash_pos_next(pos, inc, mask) (((pos) + (inc)) & (mask))
|
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
static void hash_init(struct hash_table *head)
|
2007-09-14 14:34:21 +08:00
|
|
|
{
|
2018-12-12 04:30:25 +08:00
|
|
|
head->size = HASH_INIT_SIZE;
|
2007-09-14 14:34:21 +08:00
|
|
|
head->load = 0;
|
2018-12-12 04:30:25 +08:00
|
|
|
head->max_load = hash_max_load(head->size);
|
|
|
|
nasm_newn(head->table, head->size);
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-12-12 04:30:25 +08:00
|
|
|
* Find an entry in a hash table. The key can be any binary object.
|
2007-09-14 14:34:21 +08:00
|
|
|
*
|
|
|
|
* On failure, if "insert" is non-NULL, store data in that structure
|
|
|
|
* which can be used to insert that node using hash_add().
|
2018-12-12 04:30:25 +08:00
|
|
|
* See hash_add() for constraints on the uses of the insert object.
|
2007-09-17 08:57:25 +08:00
|
|
|
*
|
|
|
|
* On success, return a pointer to the "data" element of the hash
|
|
|
|
* structure.
|
2007-09-14 14:34:21 +08:00
|
|
|
*/
|
2018-12-12 04:30:25 +08:00
|
|
|
void **hash_findb(struct hash_table *head, const void *key,
|
|
|
|
size_t keylen, struct hash_insert *insert)
|
2007-09-14 14:34:21 +08:00
|
|
|
{
|
2018-12-12 04:30:25 +08:00
|
|
|
struct hash_node *np = NULL;
|
|
|
|
struct hash_node *tbl = head->table;
|
|
|
|
uint64_t hash = hash_calc(key, keylen);
|
2009-11-07 02:58:48 +08:00
|
|
|
size_t mask = hash_mask(head->size);
|
|
|
|
size_t pos = hash_pos(hash, mask);
|
|
|
|
size_t inc = hash_inc(hash, mask);
|
2007-09-14 14:34:21 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
if (likely(tbl)) {
|
|
|
|
while ((np = &tbl[pos])->key) {
|
|
|
|
if (hash == np->hash &&
|
|
|
|
keylen == np->keylen &&
|
|
|
|
!memcmp(key, np->key, keylen))
|
|
|
|
return &np->data;
|
|
|
|
pos = hash_pos_next(pos, inc, mask);
|
|
|
|
}
|
2007-09-17 08:57:25 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Not found. Store info for insert if requested. */
|
|
|
|
if (insert) {
|
2018-12-12 04:30:25 +08:00
|
|
|
insert->node.hash = hash;
|
|
|
|
insert->node.key = key;
|
|
|
|
insert->node.keylen = keylen;
|
|
|
|
insert->node.data = NULL;
|
2009-10-11 17:17:52 +08:00
|
|
|
insert->head = head;
|
|
|
|
insert->where = np;
|
2007-09-17 08:57:25 +08:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2018-12-12 04:30:25 +08:00
|
|
|
* Same as hash_findb(), but for a C string.
|
2007-09-17 08:57:25 +08:00
|
|
|
*/
|
2018-12-12 04:30:25 +08:00
|
|
|
void **hash_find(struct hash_table *head, const char *key,
|
|
|
|
struct hash_insert *insert)
|
|
|
|
{
|
|
|
|
return hash_findb(head, key, strlen(key)+1, insert);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Same as hash_findb(), but for case-insensitive hashing.
|
|
|
|
*/
|
|
|
|
void **hash_findib(struct hash_table *head, const void *key, size_t keylen,
|
|
|
|
struct hash_insert *insert)
|
2007-09-17 08:57:25 +08:00
|
|
|
{
|
2018-12-12 04:30:25 +08:00
|
|
|
struct hash_node *np = NULL;
|
|
|
|
struct hash_node *tbl = head->table;
|
|
|
|
uint64_t hash = hash_calci(key, keylen);
|
2009-11-07 02:58:48 +08:00
|
|
|
size_t mask = hash_mask(head->size);
|
|
|
|
size_t pos = hash_pos(hash, mask);
|
|
|
|
size_t inc = hash_inc(hash, mask);
|
2007-09-17 08:57:25 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
if (likely(tbl)) {
|
|
|
|
while ((np = &tbl[pos])->key) {
|
|
|
|
if (hash == np->hash &&
|
|
|
|
keylen == np->keylen &&
|
|
|
|
!nasm_memicmp(key, np->key, keylen))
|
|
|
|
return &np->data;
|
|
|
|
pos = hash_pos_next(pos, inc, mask);
|
|
|
|
}
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Not found. Store info for insert if requested. */
|
|
|
|
if (insert) {
|
2018-12-12 04:30:25 +08:00
|
|
|
insert->node.hash = hash;
|
|
|
|
insert->node.key = key;
|
|
|
|
insert->node.keylen = keylen;
|
|
|
|
insert->node.data = NULL;
|
2009-10-11 17:17:52 +08:00
|
|
|
insert->head = head;
|
|
|
|
insert->where = np;
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
/*
|
|
|
|
* Same as hash_find(), but for case-insensitive hashing.
|
|
|
|
*/
|
|
|
|
void **hash_findi(struct hash_table *head, const char *key,
|
|
|
|
struct hash_insert *insert)
|
|
|
|
{
|
|
|
|
return hash_findib(head, key, strlen(key)+1, insert);
|
|
|
|
}
|
|
|
|
|
2007-09-14 14:34:21 +08:00
|
|
|
/*
|
2007-09-17 08:57:25 +08:00
|
|
|
* Insert node. Return a pointer to the "data" element of the newly
|
|
|
|
* created hash node.
|
2018-12-12 04:30:25 +08:00
|
|
|
*
|
|
|
|
* The following constraints apply:
|
|
|
|
* 1. A call to hash_add() invalidates all other outstanding hash_insert
|
|
|
|
* objects; attempting to use them causes a wild pointer reference.
|
|
|
|
* 2. The key provided must exactly match the key passed to hash_find*(),
|
|
|
|
* but it does not have to point to the same storage address. The key
|
|
|
|
* buffer provided to this function must not be freed for the lifespan
|
|
|
|
* of the hash. NULL will use the same pointer that was passed to
|
|
|
|
* hash_find*().
|
2007-09-14 14:34:21 +08:00
|
|
|
*/
|
2018-12-12 04:30:25 +08:00
|
|
|
void **hash_add(struct hash_insert *insert, const void *key, void *data)
|
2007-09-14 14:34:21 +08:00
|
|
|
{
|
|
|
|
struct hash_table *head = insert->head;
|
2018-12-12 04:30:25 +08:00
|
|
|
struct hash_node *np = insert->where;
|
|
|
|
|
|
|
|
if (unlikely(!np)) {
|
|
|
|
hash_init(head);
|
|
|
|
/* The hash table is empty, so we don't need to iterate here */
|
|
|
|
np = &head->table[hash_pos(insert->node.hash, hash_mask(head->size))];
|
|
|
|
}
|
2007-09-14 14:34:21 +08:00
|
|
|
|
2009-11-07 02:58:48 +08:00
|
|
|
/*
|
|
|
|
* Insert node. We can always do this, even if we need to
|
|
|
|
* rebalance immediately after.
|
|
|
|
*/
|
2018-12-12 04:30:25 +08:00
|
|
|
*np = insert->node;
|
2007-09-14 14:34:21 +08:00
|
|
|
np->data = data;
|
2018-12-12 04:30:25 +08:00
|
|
|
if (key)
|
|
|
|
np->key = key;
|
2007-09-14 14:34:21 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
if (unlikely(++head->load > head->max_load)) {
|
2009-10-11 17:17:52 +08:00
|
|
|
/* Need to expand the table */
|
2018-12-12 04:30:25 +08:00
|
|
|
size_t newsize = hash_expand(head->size);
|
|
|
|
struct hash_node *newtbl;
|
|
|
|
size_t mask = hash_mask(newsize);
|
|
|
|
struct hash_node *op, *xp;
|
|
|
|
size_t i;
|
2009-10-11 17:17:52 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
nasm_newn(newtbl, newsize);
|
2009-10-11 17:17:52 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
/* Rebalance all the entries */
|
|
|
|
for (i = 0, op = head->table; i < head->size; i++, op++) {
|
|
|
|
if (op->key) {
|
|
|
|
size_t pos = hash_pos(op->hash, mask);
|
|
|
|
size_t inc = hash_inc(op->hash, mask);
|
2009-10-11 17:17:52 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
while ((xp = &newtbl[pos])->key)
|
|
|
|
pos = hash_pos_next(pos, inc, mask);
|
2009-10-11 17:17:52 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
*xp = *op;
|
|
|
|
if (op == np)
|
|
|
|
np = xp;
|
2009-10-11 17:17:52 +08:00
|
|
|
}
|
|
|
|
}
|
2018-12-12 04:30:25 +08:00
|
|
|
nasm_free(head->table);
|
2009-10-11 17:17:52 +08:00
|
|
|
|
|
|
|
head->table = newtbl;
|
|
|
|
head->size = newsize;
|
2009-11-07 02:58:48 +08:00
|
|
|
head->max_load = hash_max_load(newsize);
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
2007-09-17 08:57:25 +08:00
|
|
|
|
|
|
|
return &np->data;
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
|
|
|
|
2007-09-17 08:57:25 +08:00
|
|
|
/*
|
2018-12-14 16:10:15 +08:00
|
|
|
* Iterate over all members of a hash set. For the first call, iter
|
|
|
|
* should be as initialized by hash_iterator_init(). Returns a struct
|
|
|
|
* hash_node representing the current object, or NULL if we have
|
|
|
|
* reached the end of the hash table.
|
2018-12-12 04:30:25 +08:00
|
|
|
*
|
|
|
|
* Calling hash_add() will invalidate the iterator.
|
2007-09-17 08:57:25 +08:00
|
|
|
*/
|
2018-12-12 04:30:25 +08:00
|
|
|
const struct hash_node *hash_iterate(struct hash_iterator *iter)
|
2007-09-14 14:34:21 +08:00
|
|
|
{
|
2018-12-12 04:30:25 +08:00
|
|
|
const struct hash_table *head = iter->head;
|
|
|
|
const struct hash_node *cp = iter->next;
|
|
|
|
const struct hash_node *ep = head->table + head->size;
|
2007-09-14 14:34:21 +08:00
|
|
|
|
2018-12-14 16:10:15 +08:00
|
|
|
/* For an empty table, cp == ep == NULL */
|
2018-12-12 04:30:25 +08:00
|
|
|
while (cp < ep) {
|
2018-12-14 16:10:15 +08:00
|
|
|
if (cp->key) {
|
|
|
|
iter->next = cp+1;
|
2018-12-12 04:30:25 +08:00
|
|
|
return cp;
|
2009-10-11 17:17:52 +08:00
|
|
|
}
|
2018-12-14 16:10:15 +08:00
|
|
|
cp++;
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
iter->next = head->table;
|
2007-09-17 08:57:25 +08:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Free the hash itself. Doesn't free the data elements; use
|
2017-04-24 15:41:21 +08:00
|
|
|
* hash_iterate() to do that first, if needed. This function is normally
|
|
|
|
* used when the hash data entries are either freed separately, or
|
|
|
|
* compound objects which can't be freed in a single operation.
|
2007-09-17 08:57:25 +08:00
|
|
|
*/
|
|
|
|
void hash_free(struct hash_table *head)
|
|
|
|
{
|
2008-05-29 03:28:58 +08:00
|
|
|
void *p = head->table;
|
2018-12-14 16:10:15 +08:00
|
|
|
memset(head, 0, sizeof *head);
|
|
|
|
nasm_free(p);
|
2007-09-14 14:34:21 +08:00
|
|
|
}
|
2017-04-24 15:41:21 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Frees the hash *and* all data elements. This is applicable only in
|
|
|
|
* the case where the data element is a single allocation. If the
|
|
|
|
* second argument is false, the key string is part of the data
|
|
|
|
* allocation or belongs to an allocation which will be freed
|
|
|
|
* separately, if it is true the keys are also freed.
|
|
|
|
*/
|
|
|
|
void hash_free_all(struct hash_table *head, bool free_keys)
|
|
|
|
{
|
2018-12-12 04:30:25 +08:00
|
|
|
struct hash_iterator it;
|
|
|
|
const struct hash_node *np;
|
2017-04-24 15:41:21 +08:00
|
|
|
|
2018-12-12 04:30:25 +08:00
|
|
|
hash_for_each(head, it, np) {
|
2020-06-09 10:01:05 +08:00
|
|
|
if (np->data)
|
|
|
|
nasm_free(np->data);
|
|
|
|
if (free_keys && np->key)
|
2018-12-12 04:30:25 +08:00
|
|
|
nasm_free((void *)np->key);
|
2017-04-24 15:41:21 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
hash_free(head);
|
|
|
|
}
|