Merge branch 'nc3-hashmap-fix' of https://github.com/gsjaardema/netcdf-c into merge-gh246

This commit is contained in:
Ward Fisher 2016-04-05 15:31:06 -06:00
commit cc803f5c00
6 changed files with 243 additions and 84 deletions

View File

@ -187,9 +187,6 @@ typedef struct NC_vararray {
/* Begin defined in lookup3.c */
extern uint32_t
hash_fast(const void *key, size_t length);
/* End defined in lookup3.c */
/* Begin defined in var.c */

View File

@ -1,24 +1,37 @@
#ifndef HASHMAP_H_INCLUDED
#define HASHMAP_H_INCLUDED
#include <stdlib.h>
typedef struct NC_vararray NC_vararray;
typedef struct NC_dimarray NC_dimarray;
/** Hashmap structure (forward declaration) */
struct s_hashmap;
typedef struct s_hashmap NC_hashmap;
/** Creates a new hashmap near the given size. */
extern NC_hashmap* NC_hashmapCreate(int startsize);
extern NC_hashmap* NC_hashmapCreate(unsigned long startsize);
/** Inserts a new element into the hashmap. */
extern void NC_hashmapInsert(NC_hashmap*, int data, unsigned long key);
extern void NC_hashmapAddDim(const NC_dimarray*, long data, const char *name);
/** Removes the storage for the element of the key and returns the element. */
extern int NC_hashmapRemove(NC_hashmap*, unsigned long key);
extern long NC_hashmapRemoveDim(const NC_dimarray*, const char *name);
/** Returns the element for the key. */
extern int NC_hashmapGet(NC_hashmap*, unsigned long key);
extern long NC_hashmapGetDim(const NC_dimarray*, const char *name);
/** Inserts a new element into the hashmap. */
extern void NC_hashmapAddVar(const NC_vararray*, long data, const char *name);
/** Removes the storage for the element of the key and returns the element. */
extern long NC_hashmapRemoveVar(const NC_vararray*, const char *name);
/** Returns the element for the key. */
extern long NC_hashmapGetVar(const NC_vararray*, const char *name);
/** Returns the number of saved elements. */
extern long NC_hashmapCount(NC_hashmap*);
extern unsigned long NC_hashmapCount(NC_hashmap*);
/** Removes the hashmap structure. */
extern void NC_hashmapDelete(NC_hashmap*);

View File

@ -126,7 +126,6 @@ NC_finddim(const NC_dimarray *ncap, const char *uname, NC_dim **dimpp)
{
int dimid;
uint32_t shash;
NC_dim ** loc;
char *name;
@ -143,9 +142,7 @@ NC_finddim(const NC_dimarray *ncap, const char *uname, NC_dim **dimpp)
name = (char *)utf8proc_NFC((const unsigned char *)uname);
if(name == NULL)
return NC_ENOMEM;
shash = hash_fast(name, strlen(name));
shash = hash_fast(name, strlen(name));
dimid = NC_hashmapGet(ncap->hashmap, shash);
dimid = (int)NC_hashmapGetDim(ncap, name);
free(name);
if (dimid >= 0) {
if (dimpp != NULL)
@ -293,9 +290,7 @@ incr_NC_dimarray(NC_dimarray *ncap, NC_dim *newelemp)
if(newelemp != NULL)
{
uint32_t key = hash_fast(newelemp->name->cp,
strlen(newelemp->name->cp));
NC_hashmapInsert(ncap->hashmap, ncap->nelems, key);
NC_hashmapAddDim(ncap, (long)ncap->nelems, newelemp->name->cp);
ncap->value[ncap->nelems] = newelemp;
ncap->nelems++;
}
@ -449,7 +444,6 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
int existid;
NC_dim *dimp;
char *newname; /* normalized */
uint32_t old_hash_key, dim_hash;
status = NC_check_id(ncid, &nc);
if(status != NC_NOERR)
@ -472,10 +466,7 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
return NC_EBADDIM;
NC_string *old = dimp->name;
old_hash_key = hash_fast(old->cp, strlen(old->cp));
newname = (char *)utf8proc_NFC((const unsigned char *)unewname);
dim_hash = hash_fast(newname, strlen(newname));
if(newname == NULL)
return NC_ENOMEM;
if(NC_indef(ncp))
@ -485,11 +476,11 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
if(newStr == NULL)
return NC_ENOMEM;
dimp->name = newStr;
free_NC_string(old);
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->dims.hashmap, old_hash_key);
NC_hashmapInsert(ncp->dims.hashmap, dimid, dim_hash);
NC_hashmapRemoveDim(&ncp->dims, old->cp);
NC_hashmapAddDim(&ncp->dims, dimid, newStr->cp);
free_NC_string(old);
return NC_NOERR;
}
@ -502,8 +493,8 @@ NC3_rename_dim( int ncid, int dimid, const char *unewname)
return status;
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->dims.hashmap, old_hash_key);
NC_hashmapInsert(ncp->dims.hashmap, dimid, dim_hash);
NC_hashmapRemoveDim(&ncp->dims, old->cp);
NC_hashmapAddDim(&ncp->dims, dimid, dimp->name->cp);
set_NC_hdirty(ncp);

View File

@ -1,6 +1,9 @@
#include "nc_hashmap.h"
#include "nc3internal.h"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
/* this should be prime */
@ -8,30 +11,33 @@
#define ACTIVE 1
extern uint32_t hash_fast(const void *key, size_t length);
/* NOTE: 'data' is the dimid or varid which is non-negative.
we store the dimid+1 so a valid entry will have
data > 0
*/
typedef struct {
int data;
long data;
int flags;
long key;
unsigned long key;
} hEntry;
struct s_hashmap{
hEntry* table;
long size, count;
unsigned long size;
unsigned long count;
};
static unsigned long isPrime(unsigned long val)
static int isPrime(unsigned long val)
{
int i, p, exp, a;
int i;
for (i = 9; i--;)
{
a = (rand() % (val-4)) + 2;
p = 1;
exp = val-1;
unsigned long a = ((unsigned long)random() % (val-4)) + 2;
unsigned long p = 1;
unsigned long exp = val-1;
while (exp)
{
if (exp & 1)
@ -48,7 +54,7 @@ static unsigned long isPrime(unsigned long val)
return 1;
}
static int findPrimeGreaterThan(int val)
static unsigned long findPrimeGreaterThan(unsigned long val)
{
if (val & 1)
val+=2;
@ -61,10 +67,11 @@ static int findPrimeGreaterThan(int val)
return val;
}
static void rehash(NC_hashmap* hm)
static void rehashDim(const NC_dimarray* ncap)
{
long size = hm->size;
long count = hm->count;
NC_hashmap* hm = ncap->hashmap;
unsigned long size = hm->size;
unsigned long count = hm->count;
hEntry* table = hm->table;
@ -72,10 +79,12 @@ static void rehash(NC_hashmap* hm)
hm->table = (hEntry*)calloc(sizeof(hEntry), hm->size);
hm->count = 0;
while(--size >= 0) {
while(size > 0) {
--size;
if (table[size].flags == ACTIVE) {
NC_hashmapInsert(hm, table[size].data-1, table[size].key);
assert(NC_hashmapGet(hm, table[size].key) == table[size].data-1);
NC_dim *elem = ncap->value[table[size].data-1];
NC_hashmapAddDim(ncap, table[size].data-1, elem->name->cp);
assert(NC_hashmapGetDim(ncap, elem->name->cp) == table[size].data-1);
}
}
@ -83,7 +92,32 @@ static void rehash(NC_hashmap* hm)
assert(count == hm->count);
}
NC_hashmap* NC_hashmapCreate(int startsize)
static void rehashVar(const NC_vararray* ncap)
{
NC_hashmap* hm = ncap->hashmap;
unsigned long size = hm->size;
unsigned long count = hm->count;
hEntry* table = hm->table;
hm->size = findPrimeGreaterThan(size<<1);
hm->table = (hEntry*)calloc(sizeof(hEntry), (size_t)hm->size);
hm->count = 0;
while(size > 0) {
--size;
if (table[size].flags == ACTIVE) {
NC_var *elem = ncap->value[table[size].data-1];
NC_hashmapAddVar(ncap, table[size].data-1, elem->name->cp);
assert(NC_hashmapGetVar(ncap, elem->name->cp) == table[size].data-1);
}
}
free(table);
assert(count == hm->count);
}
NC_hashmap* NC_hashmapCreate(unsigned long startsize)
{
NC_hashmap* hm = (NC_hashmap*)malloc(sizeof(NC_hashmap));
@ -95,31 +129,83 @@ NC_hashmap* NC_hashmapCreate(int startsize)
startsize = findPrimeGreaterThan(startsize-2);
}
hm->table = (hEntry*)calloc(sizeof(hEntry), startsize);
hm->table = (hEntry*)calloc(sizeof(hEntry), (size_t)startsize);
hm->size = startsize;
hm->count = 0;
return hm;
}
void NC_hashmapInsert(NC_hashmap* hash, int data, unsigned long key)
void NC_hashmapAddDim(const NC_dimarray* ncap, long data, const char *name)
{
long index, i, step;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
if (hash->size*3/4 <= hash->count) {
rehash(hash);
rehashDim(ncap);
}
do
{
index = key % hash->size;
step = (key % (hash->size-2)) + 1;
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].flags & ACTIVE)
{
if (hash->table[index].key == key)
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
hash->table[index].data = data+1;
return;
}
}
else
{
hash->table[index].flags |= ACTIVE;
hash->table[index].data = data+1;
hash->table[index].key = key;
++hash->count;
return;
}
index = (index + step) % hash->size;
}
/* it should not be possible that we EVER come this far, but unfortunately
not every generated prime number is prime (Carmichael numbers...) */
rehashDim(ncap);
}
while (1);
}
void NC_hashmapAddVar(const NC_vararray* ncap, long data, const char *name)
{
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
if (hash->size*3/4 <= hash->count) {
rehashVar(ncap);
}
do
{
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].flags & ACTIVE)
{
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
hash->table[index].data = data+1;
return;
@ -139,23 +225,28 @@ void NC_hashmapInsert(NC_hashmap* hash, int data, unsigned long key)
/* it should not be possible that we EVER come this far, but unfortunately
not every generated prime number is prime (Carmichael numbers...) */
rehash(hash);
rehashVar(ncap);
}
while (1);
}
int NC_hashmapRemove(NC_hashmap* hash, unsigned long key)
long NC_hashmapRemoveDim(const NC_dimarray* ncap, const char *name)
{
long index, i, step;
unsigned long i;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
index = key % hash->size;
step = (key % (hash->size-2)) + 1;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].data > 0)
{
if (hash->table[index].key == key)
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (hash->table[index].flags & ACTIVE)
{
@ -176,24 +267,68 @@ int NC_hashmapRemove(NC_hashmap* hash, unsigned long key)
return -1;
}
int NC_hashmapGet(NC_hashmap* hash, unsigned long key)
long NC_hashmapRemoveVar(const NC_vararray* ncap, const char *name)
{
unsigned long i;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].data > 0)
{
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (hash->table[index].flags & ACTIVE)
{
hash->table[index].flags &= ~ACTIVE;
--hash->count;
return hash->table[index].data-1;
}
else /* in, but not active (i.e. deleted) */
return -1;
}
}
else /* found an empty place (can't be in) */
return -1;
index = (index + step) % hash->size;
}
/* everything searched through, but not in */
return -1;
}
long NC_hashmapGetDim(const NC_dimarray* ncap, const char *name)
{
NC_hashmap* hash = ncap->hashmap;
if (hash->count)
{
long index, i, step;
index = key % hash->size;
step = (key % (hash->size-2)) + 1;
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
if (hash->table[index].key == key)
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (hash->table[index].flags & ACTIVE)
return hash->table[index].data-1;
if (entry.flags & ACTIVE)
return entry.data-1;
break;
}
else
if (!(hash->table[index].flags & ACTIVE))
if (!(entry.flags & ACTIVE))
break;
index = (index + step) % hash->size;
@ -203,7 +338,41 @@ int NC_hashmapGet(NC_hashmap* hash, unsigned long key)
return -1;
}
long NC_hashmapCount(NC_hashmap* hash)
long NC_hashmapGetVar(const NC_vararray* ncap, const char *name)
{
NC_hashmap* hash = ncap->hashmap;
if (hash->count)
{
unsigned long key = hash_fast(name, strlen(name));
NC_hashmap* hash = ncap->hashmap;
unsigned long i;
unsigned long index = key % hash->size;
unsigned long step = (key % (hash->size-2)) + 1;
for (i = 0; i < hash->size; i++)
{
hEntry entry = hash->table[index];
if (entry.key == key &&
strncmp(name, ncap->value[entry.data-1]->name->cp,
ncap->value[entry.data-1]->name->nchars) == 0)
{
if (entry.flags & ACTIVE)
return entry.data-1;
break;
}
else
if (!(entry.flags & ACTIVE))
break;
index = (index + step) % hash->size;
}
}
return -1;
}
unsigned long NC_hashmapCount(NC_hashmap* hash)
{
return hash->count;
}

View File

@ -576,10 +576,8 @@ v1h_get_NC_dimarray(v1hs *gsp, NC_dimarray *ncap)
return status;
}
{
uint32_t key = hash_fast((*dpp)->name->cp,
(*dpp)->name->nchars);
int dimid = (size_t)(dpp - ncap->value);
NC_hashmapInsert(ncap->hashmap, dimid, key);
NC_hashmapAddDim(ncap, dimid, (*dpp)->name->cp);
}
}
}
@ -1172,10 +1170,8 @@ v1h_get_NC_vararray(v1hs *gsp, NC_vararray *ncap)
return status;
}
{
uint32_t key = hash_fast((*vpp)->name->cp,
(*vpp)->name->nchars);
int varid = (size_t)(vpp - ncap->value);
NC_hashmapInsert(ncap->hashmap, varid, key);
NC_hashmapAddVar(ncap, varid, (*vpp)->name->cp);
}
}
}

View File

@ -299,9 +299,7 @@ incr_NC_vararray(NC_vararray *ncap, NC_var *newelemp)
if(newelemp != NULL)
{
uint32_t key = hash_fast(newelemp->name->cp,
strlen(newelemp->name->cp));
NC_hashmapInsert(ncap->hashmap, ncap->nelems, key);
NC_hashmapAddVar(ncap, (long)ncap->nelems, newelemp->name->cp);
ncap->value[ncap->nelems] = newelemp;
ncap->nelems++;
}
@ -337,7 +335,6 @@ int
NC_findvar(const NC_vararray *ncap, const char *uname, NC_var **varpp)
{
int hash_var_id;
uint32_t shash;
char *name;
assert(ncap != NULL);
@ -350,9 +347,8 @@ NC_findvar(const NC_vararray *ncap, const char *uname, NC_var **varpp)
name = (char *)utf8proc_NFC((const unsigned char *)uname);
if(name == NULL)
return NC_ENOMEM;
shash = hash_fast(name, strlen(name));
hash_var_id = NC_hashmapGet(ncap->hashmap, shash);
hash_var_id = (int)NC_hashmapGetVar(ncap, name);
free(name);
if (hash_var_id >= 0) {
if (varpp != NULL)
@ -708,7 +704,6 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
NC_string *old, *newStr;
int other;
char *newname; /* normalized */
uint32_t old_hash_key, var_hash;
status = NC_check_id(ncid, &nc);
if(status != NC_NOERR)
@ -735,16 +730,14 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
if(status != NC_NOERR)
{
/* invalid varid */
return status;
return status;
}
old = varp->name;
old_hash_key = hash_fast(old->cp, strlen(old->cp));
newname = (char *)utf8proc_NFC((const unsigned char *)unewname);
if(newname == NULL)
return NC_ENOMEM;
var_hash = hash_fast(newname, strlen(newname));
if(NC_indef(ncp))
{
newStr = new_NC_string(strlen(newname),newname);
@ -752,11 +745,11 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
if(newStr == NULL)
return(-1);
varp->name = newStr;
free_NC_string(old);
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->vars.hashmap, old_hash_key);
NC_hashmapInsert(ncp->vars.hashmap, varid, var_hash);
NC_hashmapRemoveVar(&ncp->vars, old->cp);
NC_hashmapAddVar(&ncp->vars, varid, newStr->cp);
free_NC_string(old);
return NC_NOERR;
}
@ -768,8 +761,8 @@ NC3_rename_var(int ncid, int varid, const char *unewname)
return status;
/* Remove old name from hashmap; add new... */
NC_hashmapRemove(ncp->vars.hashmap, old_hash_key);
NC_hashmapInsert(ncp->vars.hashmap, varid, var_hash);
NC_hashmapRemoveVar(&ncp->vars, old->cp);
NC_hashmapAddVar(&ncp->vars, varid, varp->name->cp);
set_NC_hdirty(ncp);