Fix duplicate key issues in nc_hashmap

The addition of the nc_hashmap to facilitate quick
retrieval of var and dim by name did not take into
account key collisions -- two or more names hashed
to the same value.  If the keys matched, it assumed
that the names matched also.

This change fixes this incorrect assumption and
checks both the key (which is the hash of the name)
and if the keys match, it also checks that the names
match.

While there have been no instances of duplicate keys,
they are certain to occur and cause difficult to
debug issues. This fix eliminates that defect.
This commit is contained in:
Greg Sjaardema 2016-04-05 09:04:30 -06:00
parent 928c94367b
commit 1f9eb8093c
6 changed files with 242 additions and 83 deletions

View File

@ -187,9 +187,6 @@ typedef struct NC_vararray {
/* Begin defined in lookup3.c */
extern uint32_t
hash_fast(const void *key, size_t length);
/* End defined in lookup3.c */
/* Begin defined in var.c */

View File

@ -1,24 +1,37 @@
#ifndef HASHMAP_H_INCLUDED
#define HASHMAP_H_INCLUDED
#include <stdlib.h>
typedef struct NC_vararray NC_vararray;
typedef struct NC_dimarray NC_dimarray;
/** Hashmap structure (forward declaration) */
struct s_hashmap;
typedef struct s_hashmap NC_hashmap;
/** Creates a new hashmap near the given size. */
extern NC_hashmap* NC_hashmapCreate(int startsize);
extern NC_hashmap* NC_hashmapCreate(unsigned long startsize);
/** Inserts a new element into the hashmap. */
extern void NC_hashmapInsert(NC_hashmap*, int data, unsigned long key);
extern void NC_hashmapAddDim(const NC_dimarray*, long data, const char *name);
/** Removes the storage for the element of the key and returns the element. */
extern int NC_hashmapRemove(NC_hashmap*, unsigned long key);
extern long NC_hashmapRemoveDim(const NC_dimarray*, const char *name);
/** Returns the element for the key. */
extern int NC_hashmapGet(NC_hashmap*, unsigned long key);
extern long NC_hashmapGetDim(const NC_dimarray*, const char *name);
/** Inserts a new element into the hashmap. */
extern void NC_hashmapAddVar(const NC_vararray*, long data, const char *name);
/** Removes the storage for the element of the key and returns the element. */
extern long NC_hashmapRemoveVar(const NC_vararray*, const char *name);
/** Returns the element for the key. */
extern long NC_hashmapGetVar(const NC_vararray*, const char *name);
/** Returns the number of saved elements. */
extern long NC_hashmapCount(NC_hashmap*);
extern unsigned long NC_hashmapCount(NC_hashmap*);
/** Removes the hashmap structure. */
extern void NC_hashmapDelete(NC_hashmap*);

View File

@ -126,7 +126,6 @@ NC_finddim(const NC_dimarray *ncap, const char *uname, NC_dim **dimpp)
{
int dimid;
uint32_t shash;
NC_dim ** loc;
char *name;
@ -143,9 +142,7 @@ NC_finddim(const NC_dimarray *ncap, const char *uname, NC_dim **dimpp)
name = (char *)utf8proc_NFC((const unsigned char *)uname);
if(name == NULL)
return NC_ENOMEM;
shash = hash_fast(name, strlen(name));
shash = hash_fast(name, strlen(name));
dimid = NC_hashmapGet(ncap->hashmap, shash);
dimid = (int)NC_hashmapGetDim(ncap, name);
free(name);
if (dimid >= 0) {
if (dimpp != NULL)
@ -293,9 +290,7 @@ incr_NC_dimarray(NC_dimarray *ncap, NC_dim *newelemp)
if(newelemp != NULL)
{
uint32_t key = hash_fast(newelemp->name->cp,
strlen(newelemp->name->cp));
NC_hashmapInsert(ncap->hashmap, ncap->nelems, key);
NC_hashmapAddDim(ncap, (long)ncap->nelems, newelemp->name->cp);
ncap->value[ncap->nelems] = newelemp;
ncap->nelems++;
}
@ -449,7 +444,6 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
int existid;
NC_dim *dimp;
char *newname; /* normalized */
uint32_t old_hash_key, dim_hash;
status = NC_check_id(ncid, &nc);
if(status != NC_NOERR)
@ -472,10 +466,7 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
return NC_EBADDIM;
NC_string *old = dimp->name;
old_hash_key = hash_fast(old->cp, strlen(old->cp));
newname = (char *)utf8proc_NFC((const unsigned char *)unewname);
dim_hash = hash_fast(newname, strlen(newname));
if(newname == NULL)
return NC_ENOMEM;
if(NC_indef(ncp))
@ -485,11 +476,11 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
if(newStr == NULL)
return NC_ENOMEM;
dimp->name = newStr;
free_NC_string(old);
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->dims.hashmap, old_hash_key);
NC_hashmapInsert(ncp->dims.hashmap, dimid, dim_hash);
NC_hashmapRemoveDim(&ncp->dims, old->cp);
NC_hashmapAddDim(&ncp->dims, dimid, newname);
free_NC_string(old);
return NC_NOERR;
}
@ -502,8 +493,8 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
return status;
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->dims.hashmap, old_hash_key);
NC_hashmapInsert(ncp->dims.hashmap, dimid, dim_hash);
NC_hashmapRemoveDim(&ncp->dims, old->cp);
NC_hashmapAddDim(&ncp->dims, dimid, newname);
set_NC_hdirty(ncp);

View File

@ -1,6 +1,9 @@
#include "nc_hashmap.h"
#include "nc3internal.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/* this should be prime */
@ -8,30 +11,33 @@
#define ACTIVE 1
extern uint32_t hash_fast(const void *key, size_t length);
/* NOTE: 'data' is the dimid or varid which is non-negative.
we store the dimid+1 so a valid entry will have
data > 0
*/
typedef struct {
int data;
long data;
int flags;
long key;
unsigned long key;
} hEntry;
struct s_hashmap{
hEntry* table;
long size, count;
unsigned long size;
unsigned long count;
};
static unsigned long isPrime(unsigned long val)
static int isPrime(unsigned long val)
{
int i, p, exp, a;
int i;
for (i = 9; i--;)
{
a = (rand() % (val-4)) + 2;
p = 1;
exp = val-1;
unsigned long a = ((unsigned long)random() % (val-4)) + 2;
unsigned long p = 1;
unsigned long exp = val-1;
while (exp)
{
if (exp & 1)
@ -48,7 +54,7 @@ static unsigned long isPrime(unsigned long val)
return 1;
}
static int findPrimeGreaterThan(int val)
static unsigned long findPrimeGreaterThan(unsigned long val)
{
if (val & 1)
val+=2;
@ -61,10 +67,11 @@ static int findPrimeGreaterThan(int val)
return val;
}
static void rehash(NC_hashmap* hm)
static void rehashDim(const NC_dimarray* ncap)
{
long size = hm->size;
long count = hm->count;
NC_hashmap* hm = ncap->hashmap;
unsigned long size = hm->size;
unsigned long count = hm->count;
hEntry* table = hm->table;
@ -72,10 +79,12 @@ static void rehash(NC_hashmap* hm)
hm->table = (hEntry*)calloc(sizeof(hEntry), hm->size);
hm->count = 0;
while(--size >= 0) {
while(size > 0) {
--size;
if (table[size].flags == ACTIVE) {
NC_hashmapInsert(hm, table[size].data-1, table[size].key);
assert(NC_hashmapGet(hm, table[size].key) == table[size].data-1);
NC_dim *elem = ncap->value[table[size].data-1];
NC_hashmapAddDim(ncap, table[size].data-1, elem->name->cp);
assert(NC_hashmapGetDim(ncap, elem->name->cp) == table[size].data-1);
}
}
@ -83,7 +92,32 @@ static void rehash(NC_hashmap* hm)
assert(count == hm->count);
}
NC_hashmap* NC_hashmapCreate(int startsize)
static void rehashVar(const NC_vararray* ncap)
{
NC_hashmap* hm = ncap->hashmap;
unsigned long size = hm->size;
unsigned long count = hm->count;
hEntry* table = hm->table;
hm->size = findPrimeGreaterThan(size<<1);
hm->table = (hEntry*)calloc(sizeof(hEntry), (size_t)hm->size);
hm->count = 0;
while(size > 0) {
--size;
if (table[size].flags == ACTIVE) {
NC_var *elem = ncap->value[table[size].data-1];
NC_hashmapAddVar(ncap, table[size].data-1, elem->name->cp);
assert(NC_hashmapGetVar(ncap, elem->name->cp) == table[size].data-1);
}
}
free(table);
assert(count == hm->count);
}
NC_hashmap* NC_hashmapCreate(unsigned long startsize)
{
NC_hashmap* hm = (NC_hashmap*)malloc(sizeof(NC_hashmap));
@ -95,31 +129,83 @@ NC_hashmap* NC_hashmapCreate(int startsize)
startsize = findPrimeGreaterThan(startsize-2);
}
hm->table = (hEntry*)calloc(sizeof(hEntry), startsize);
hm->table = (hEntry*)calloc(sizeof(hEntry), (size_t)startsize);
hm->size = startsize;
hm->count = 0;
return hm;
}
void NC_hashmapInsert(NC_hashmap* hash, int data, unsigned long key)
void NC_hashmapAddDim(const NC_dimarray* ncap, long data, const char *name)
{
long index, i, step;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
if (hash->size*3/4 <= hash->count) {
rehash(hash);
rehashDim(ncap);
}
do
{
index = key % hash->size;
step = (key % (hash->size-2)) + 1;
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].flags & ACTIVE)
{
if (hash->table[index].key == key)
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
hash->table[index].data = data+1;
return;
}
}
else
{
hash->table[index].flags |= ACTIVE;
hash->table[index].data = data+1;
hash->table[index].key = key;
++hash->count;
return;
}
index = (index + step) % hash->size;
}
/* it should not be possible that we EVER come this far, but unfortunately
not every generated prime number is prime (Carmichael numbers...) */
rehashDim(ncap);
}
while (1);
}
void NC_hashmapAddVar(const NC_vararray* ncap, long data, const char *name)
{
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
if (hash->size*3/4 <= hash->count) {
rehashVar(ncap);
}
do
{
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].flags & ACTIVE)
{
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
hash->table[index].data = data+1;
return;
@ -139,23 +225,28 @@ void NC_hashmapInsert(NC_hashmap* hash, int data, unsigned long key)
/* it should not be possible that we EVER come this far, but unfortunately
not every generated prime number is prime (Carmichael numbers...) */
rehash(hash);
rehashVar(ncap);
}
while (1);
}
int NC_hashmapRemove(NC_hashmap* hash, unsigned long key)
long NC_hashmapRemoveDim(const NC_dimarray* ncap, const char *name)
{
long index, i, step;
unsigned long i;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
index = key % hash->size;
step = (key % (hash->size-2)) + 1;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].data > 0)
{
if (hash->table[index].key == key)
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (hash->table[index].flags & ACTIVE)
{
@ -176,24 +267,68 @@ int NC_hashmapRemove(NC_hashmap* hash, unsigned long key)
return -1;
}
int NC_hashmapGet(NC_hashmap* hash, unsigned long key)
long NC_hashmapRemoveVar(const NC_vararray* ncap, const char *name)
{
unsigned long i;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].data > 0)
{
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (hash->table[index].flags & ACTIVE)
{
hash->table[index].flags &= ~ACTIVE;
--hash->count;
return hash->table[index].data-1;
}
else /* in, but not active (i.e. deleted) */
return -1;
}
}
else /* found an empty place (can't be in) */
return -1;
index = (index + step) % hash->size;
}
/* everything searched through, but not in */
return -1;
}
long NC_hashmapGetDim(const NC_dimarray* ncap, const char *name)
{
NC_hashmap* hash = ncap->hashmap;
if (hash->count)
{
long index, i, step;
index = key % hash->size;
step = (key % (hash->size-2)) + 1;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].key == key)
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (hash->table[index].flags & ACTIVE)
return hash->table[index].data-1;
if (entry.flags & ACTIVE)
return entry.data-1;
break;
}
else
if (!(hash->table[index].flags & ACTIVE))
if (!(entry.flags & ACTIVE))
break;
index = (index + step) % hash->size;
@ -203,7 +338,41 @@ int NC_hashmapGet(NC_hashmap* hash, unsigned long key)
return -1;
}
long NC_hashmapCount(NC_hashmap* hash)
long NC_hashmapGetVar(const NC_vararray* ncap, const char *name)
{
NC_hashmap* hash = ncap->hashmap;
if (hash->count)
{
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (entry.flags & ACTIVE)
return entry.data-1;
break;
}
else
if (!(entry.flags & ACTIVE))
break;
index = (index + step) % hash->size;
}
}
return -1;
}
unsigned long NC_hashmapCount(NC_hashmap* hash)
{
return hash->count;
}

View File

@ -576,10 +576,8 @@ v1h_get_NC_dimarray(v1hs *gsp, NC_dimarray *ncap)
return status;
}
{
uint32_t key = hash_fast((*dpp)->name->cp,
(*dpp)->name->nchars);
int dimid = (size_t)(dpp - ncap->value);
NC_hashmapInsert(ncap->hashmap, dimid, key);
NC_hashmapAddDim(ncap, dimid, (*dpp)->name->cp);
}
}
}
@ -1172,10 +1170,8 @@ v1h_get_NC_vararray(v1hs *gsp, NC_vararray *ncap)
return status;
}
{
uint32_t key = hash_fast((*vpp)->name->cp,
(*vpp)->name->nchars);
int varid = (size_t)(vpp - ncap->value);
NC_hashmapInsert(ncap->hashmap, varid, key);
NC_hashmapAddVar(ncap, varid, (*vpp)->name->cp);
}
}
}

View File

@ -299,9 +299,7 @@ incr_NC_vararray(NC_vararray *ncap, NC_var *newelemp)
if(newelemp != NULL)
{
uint32_t key = hash_fast(newelemp->name->cp,
strlen(newelemp->name->cp));
NC_hashmapInsert(ncap->hashmap, ncap->nelems, key);
NC_hashmapAddVar(ncap, (long)ncap->nelems, newelemp->name->cp);
ncap->value[ncap->nelems] = newelemp;
ncap->nelems++;
}
@ -337,7 +335,6 @@ int
NC_findvar(const NC_vararray *ncap, const char *uname, NC_var **varpp)
{
int hash_var_id;
uint32_t shash;
char *name;
assert(ncap != NULL);
@ -350,9 +347,8 @@ NC_findvar(const NC_vararray *ncap, const char *uname, NC_var **varpp)
name = (char *)utf8proc_NFC((const unsigned char *)uname);
if(name == NULL)
return NC_ENOMEM;
shash = hash_fast(name, strlen(name));
hash_var_id = NC_hashmapGet(ncap->hashmap, shash);
hash_var_id = (int)NC_hashmapGetVar(ncap, name);
free(name);
if (hash_var_id >= 0) {
if (varpp != NULL)
@ -708,7 +704,6 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
NC_string *old, *newStr;
int other;
char *newname; /* normalized */
uint32_t old_hash_key, var_hash;
status = NC_check_id(ncid, &nc);
if(status != NC_NOERR)
@ -740,11 +735,9 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
old = varp->name;
old_hash_key = hash_fast(old->cp, strlen(old->cp));
newname = (char *)utf8proc_NFC((const unsigned char *)unewname);
if(newname == NULL)
return NC_ENOMEM;
var_hash = hash_fast(newname, strlen(newname));
if(NC_indef(ncp))
{
newStr = new_NC_string(strlen(newname),newname);
@ -752,11 +745,11 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
if(newStr == NULL)
return(-1);
varp->name = newStr;
free_NC_string(old);
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->vars.hashmap, old_hash_key);
NC_hashmapInsert(ncp->vars.hashmap, varid, var_hash);
NC_hashmapRemoveVar(&ncp->vars, old->cp);
NC_hashmapAddVar(&ncp->vars, varid, newname);
free_NC_string(old);
return NC_NOERR;
}
@ -768,8 +761,8 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
return status;
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->vars.hashmap, old_hash_key);
NC_hashmapInsert(ncp->vars.hashmap, varid, var_hash);
NC_hashmapRemoveVar(&ncp->vars, old->cp);
NC_hashmapAddVar(&ncp->vars, varid, newname);
set_NC_hdirty(ncp);