mirror of
https://github.com/Unidata/netcdf-c.git
synced 2024-11-27 07:30:33 +08:00
Add hash field to dim and var to facilitate fast name compare
In non-classic netcdf-4 models, it is allowable to have large numbers of dims and vars. In many operations, the entire list of dims or vars is searched for a dim/var matching a specific name which results in *lots* of strncmp or strcmp calls. If we add a hash field to the var and dim structs similar to what has already been done for the netcdf-3 formats, then we can hash the name being searched for and numerically compare that value with the var/dim hash value. If they match, then do a more expensive strncmp call to ensure that the names truly match.
This commit is contained in:
parent
ba06e979c6
commit
1a84a6a99e
@ -113,6 +113,7 @@ typedef struct NC_DIM_INFO
|
||||
NC_LIST_NODE_T l; /* Use generic doubly-linked list (must be first) */
|
||||
char *name;
|
||||
size_t len;
|
||||
uint32_t hash;
|
||||
int dimid;
|
||||
nc_bool_t unlimited; /* True if the dimension is unlimited */
|
||||
nc_bool_t extended; /* True if the dimension needs to be extended */
|
||||
@ -148,6 +149,7 @@ typedef struct NC_VAR_INFO
|
||||
NC_DIM_INFO_T **dim;
|
||||
int varid;
|
||||
int natts;
|
||||
uint32_t hash;
|
||||
nc_bool_t is_new_var; /* True if variable is newly created */
|
||||
nc_bool_t was_coord_var; /* True if variable was a coordinate var, but either the dim or var has been renamed */
|
||||
nc_bool_t became_coord_var; /* True if variable _became_ a coordinate var, because either the dim or var has been renamed */
|
||||
|
@ -73,6 +73,7 @@ NC4_def_dim(int ncid, const char *name, size_t len, int *idp)
|
||||
NC_DIM_INFO_T *dim;
|
||||
char norm_name[NC_MAX_NAME + 1];
|
||||
int retval = NC_NOERR;
|
||||
uint32_t nn_hash;
|
||||
|
||||
LOG((2, "%s: ncid 0x%x name %s len %d", __func__, ncid, name,
|
||||
(int)len));
|
||||
@ -122,9 +123,11 @@ NC4_def_dim(int ncid, const char *name, size_t len, int *idp)
|
||||
if(len > X_UINT_MAX) /* Backward compat */
|
||||
return NC_EDIMSIZE;
|
||||
|
||||
nn_hash = hash_fast(norm_name, strlen(norm_name));
|
||||
|
||||
/* Make sure the name is not already in use. */
|
||||
for (dim = grp->dim; dim; dim = dim->l.next)
|
||||
if (!strncmp(dim->name, norm_name, NC_MAX_NAME))
|
||||
if (nn_hash == dim->hash && !strncmp(dim->name, norm_name, NC_MAX_NAME))
|
||||
return NC_ENAMEINUSE;
|
||||
|
||||
/* Add a dimension to the list. The ID must come from the file
|
||||
@ -139,6 +142,8 @@ NC4_def_dim(int ncid, const char *name, size_t len, int *idp)
|
||||
if (len == NC_UNLIMITED)
|
||||
dim->unlimited = NC_TRUE;
|
||||
|
||||
dim->hash = nn_hash;
|
||||
|
||||
/* Pass back the dimid. */
|
||||
if (idp)
|
||||
*idp = dim->dimid;
|
||||
@ -157,7 +162,8 @@ NC4_inq_dimid(int ncid, const char *name, int *idp)
|
||||
char norm_name[NC_MAX_NAME + 1];
|
||||
int finished = 0;
|
||||
int retval;
|
||||
|
||||
uint32_t shash;
|
||||
|
||||
LOG((2, "%s: ncid 0x%x name %s", __func__, ncid, name));
|
||||
|
||||
/* Find metadata for this file. */
|
||||
@ -177,10 +183,12 @@ NC4_inq_dimid(int ncid, const char *name, int *idp)
|
||||
if ((retval = nc4_normalize_name(name, norm_name)))
|
||||
return retval;
|
||||
|
||||
shash = hash_fast(norm_name, strlen(norm_name));
|
||||
|
||||
/* Go through each dim and check for a name match. */
|
||||
for (g = grp; g && !finished; g = g->parent)
|
||||
for (dim = g->dim; dim; dim = dim->l.next)
|
||||
if (!strncmp(dim->name, norm_name, NC_MAX_NAME))
|
||||
if (dim->hash == shash && !strncmp(dim->name, norm_name, NC_MAX_NAME))
|
||||
{
|
||||
if (idp)
|
||||
*idp = dim->dimid;
|
||||
@ -336,6 +344,8 @@ NC4_rename_dim(int ncid, int dimid, const char *name)
|
||||
return NC_ENOMEM;
|
||||
strcpy(dim->name, norm_name);
|
||||
|
||||
dim->hash = hash_fast(norm_name, strlen(norm_name));
|
||||
|
||||
/* Check if dimension was a coordinate variable, but names are different now */
|
||||
if (dim->coord_var && strcmp(dim->name, dim->coord_var->name))
|
||||
{
|
||||
|
@ -606,6 +606,7 @@ read_scale(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name,
|
||||
new_dim->hdf5_objid.fileno[1] = statbuf->fileno[1];
|
||||
new_dim->hdf5_objid.objno[0] = statbuf->objno[0];
|
||||
new_dim->hdf5_objid.objno[1] = statbuf->objno[1];
|
||||
new_dim->hash = hash_fast(obj_name, strlen(obj_name));
|
||||
|
||||
/* If the dimscale has an unlimited dimension, then this dimension
|
||||
* is unlimited. */
|
||||
@ -1564,6 +1565,7 @@ read_var(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name,
|
||||
strcpy(var->name, obj_name);
|
||||
}
|
||||
|
||||
var->hash = hash_fast(var->name, strlen(var->name));
|
||||
/* Find out what filters are applied to this HDF5 dataset,
|
||||
* fletcher32, deflate, and/or shuffle. All other filters are
|
||||
* ignored. */
|
||||
@ -2672,6 +2674,7 @@ nc4_open_hdf4_file(const char *path, int mode, NC *nc)
|
||||
dim->len = dim_len;
|
||||
else
|
||||
dim->len = *dimsize;
|
||||
dim->hash = hash_fast(dim_name, strlen(dim_name));
|
||||
}
|
||||
|
||||
/* Tell the variable the id of this dimension. */
|
||||
|
@ -764,7 +764,8 @@ nc4_check_dup_name(NC_GRP_INFO_T *grp, char *name)
|
||||
NC_TYPE_INFO_T *type;
|
||||
NC_GRP_INFO_T *g;
|
||||
NC_VAR_INFO_T *var;
|
||||
|
||||
uint32_t hash;
|
||||
|
||||
/* Any types of this name? */
|
||||
for (type = grp->type; type; type = type->l.next)
|
||||
if (!strcmp(type->name, name))
|
||||
@ -776,8 +777,9 @@ nc4_check_dup_name(NC_GRP_INFO_T *grp, char *name)
|
||||
return NC_ENAMEINUSE;
|
||||
|
||||
/* Any variables of this name? */
|
||||
hash = hash_fast(name, strlen(name));
|
||||
for (var = grp->var; var; var = var->l.next)
|
||||
if (!strcmp(var->name, name))
|
||||
if (var->hash == hash && !strcmp(var->name, name))
|
||||
return NC_ENAMEINUSE;
|
||||
|
||||
return NC_NOERR;
|
||||
|
@ -425,6 +425,7 @@ nc_def_var_nc4(int ncid, const char *name, nc_type xtype,
|
||||
if (!(var->name = malloc((strlen(norm_name) + 1) * sizeof(char))))
|
||||
BAIL(NC_ENOMEM);
|
||||
strcpy(var->name, norm_name);
|
||||
var->hash = hash_fast(norm_name, strlen(norm_name));
|
||||
var->varid = grp->nvars++;
|
||||
var->ndims = ndims;
|
||||
var->is_new_var = NC_TRUE;
|
||||
@ -513,7 +514,7 @@ nc_def_var_nc4(int ncid, const char *name, nc_type xtype,
|
||||
BAIL(retval);
|
||||
|
||||
/* Check for dim index 0 having the same name, in the same group */
|
||||
if (d == 0 && dim_grp == grp && strcmp(dim->name, norm_name) == 0)
|
||||
if (d == 0 && dim_grp == grp && dim->hash == var->hash && strcmp(dim->name, norm_name) == 0)
|
||||
{
|
||||
var->dimscale = NC_TRUE;
|
||||
dim->coord_var = var;
|
||||
@ -571,7 +572,7 @@ nc_def_var_nc4(int ncid, const char *name, nc_type xtype,
|
||||
* because the dimension will cause a HDF5 dataset to be created,
|
||||
* and this var has the same name. */
|
||||
for (dim = grp->dim; dim; dim = dim->l.next)
|
||||
if (!strcmp(dim->name, norm_name) &&
|
||||
if (dim->hash == var->hash && !strcmp(dim->name, norm_name) &&
|
||||
(!var->ndims || dimidsp[0] != dim->dimid))
|
||||
{
|
||||
/* Set a different hdf5 name for this variable to avoid name
|
||||
@ -1152,6 +1153,8 @@ NC4_inq_varid(int ncid, const char *name, int *varidp)
|
||||
NC_VAR_INFO_T *var;
|
||||
char norm_name[NC_MAX_NAME + 1];
|
||||
int retval;
|
||||
uint32_t nn_hash;
|
||||
|
||||
#if 0 /*def USE_PNETCDF*/
|
||||
NC_HDF5_FILE_INFO_T *h5;
|
||||
#endif
|
||||
@ -1181,9 +1184,11 @@ NC4_inq_varid(int ncid, const char *name, int *varidp)
|
||||
if ((retval = nc4_normalize_name(name, norm_name)))
|
||||
return retval;
|
||||
|
||||
nn_hash = hash_fast(norm_name, strlen(norm_name));
|
||||
|
||||
/* Find var of this name. */
|
||||
for (var = grp->var; var; var = var->l.next)
|
||||
if (!(strcmp(var->name, norm_name)))
|
||||
if (nn_hash == var->hash && !(strcmp(var->name, norm_name)))
|
||||
{
|
||||
*varidp = var->varid;
|
||||
return NC_NOERR;
|
||||
@ -1203,6 +1208,7 @@ NC4_rename_var(int ncid, int varid, const char *name)
|
||||
NC_GRP_INFO_T *grp;
|
||||
NC_HDF5_FILE_INFO_T *h5;
|
||||
NC_VAR_INFO_T *var, *tmp_var;
|
||||
uint32_t nn_hash;
|
||||
int retval = NC_NOERR;
|
||||
|
||||
LOG((2, "%s: ncid 0x%x varid %d name %s",
|
||||
@ -1234,10 +1240,11 @@ NC4_rename_var(int ncid, int varid, const char *name)
|
||||
return retval;
|
||||
|
||||
/* Check if name is in use, and retain a pointer to the correct variable */
|
||||
nn_hash = hash_fast(name, strlen(name));
|
||||
tmp_var = NULL;
|
||||
for (var = grp->var; var; var = var->l.next)
|
||||
{
|
||||
if (!strncmp(var->name, name, NC_MAX_NAME))
|
||||
if (nn_hash == var->hash && !strncmp(var->name, name, NC_MAX_NAME))
|
||||
return NC_ENAMEINUSE;
|
||||
if (var->varid == varid)
|
||||
tmp_var = var;
|
||||
@ -1265,6 +1272,7 @@ NC4_rename_var(int ncid, int varid, const char *name)
|
||||
if (!(var->name = malloc((strlen(name) + 1) * sizeof(char))))
|
||||
return NC_ENOMEM;
|
||||
strcpy(var->name, name);
|
||||
var->hash = nn_hash;
|
||||
|
||||
/* Check if this was a coordinate variable previously, but names are different now */
|
||||
if (var->dimscale && strcmp(var->name, var->dim[0]->name))
|
||||
|
Loading…
Reference in New Issue
Block a user