From 4ccebf25b577f3c772558fd8d1aad86f64197cc4 Mon Sep 17 00:00:00 2001 From: Greg Sjaardema Date: Fri, 4 Mar 2016 09:14:15 -0700 Subject: [PATCH] Use dim field of var instead of finding dim from var->dimids. The var struct has a 'dim' field which was not being used Instead, the dimids field would always search for the dim with the matching dimid. For db with large numbers of dims, this could be a significant time sync. Modified code to always set var-dim[i] when var->dimids[i] was set (if the dim existed at that point). Then use the var->dim field instead of var->dimids and search whenever requested. All var->dim accesses are protected by asserts that verify non-null and that the var->dim[]->dimid == var->dimids[]. --- libsrc4/nc4file.c | 14 ++- libsrc4/nc4hdf.c | 302 ++++++++++++++++++++-------------------------- libsrc4/nc4var.c | 3 +- 3 files changed, 143 insertions(+), 176 deletions(-) diff --git a/libsrc4/nc4file.c b/libsrc4/nc4file.c index d52e39ea0..83c5f2d08 100644 --- a/libsrc4/nc4file.c +++ b/libsrc4/nc4file.c @@ -661,12 +661,13 @@ exit: /* This function reads the hacked in coordinates attribute I use for * multi-dimensional coordinates. */ static int -read_coord_dimids(NC_VAR_INFO_T *var) +read_coord_dimids(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var) { hid_t coord_att_typeid = -1, coord_attid = -1, spaceid = -1; hssize_t npoints; int ret = 0; - + int d; + /* There is a hidden attribute telling us the ids of the * dimensions that apply to this multi-dimensional coordinate * variable. Read it. */ @@ -687,6 +688,12 @@ read_coord_dimids(NC_VAR_INFO_T *var) if (!ret && H5Aread(coord_attid, coord_att_typeid, var->dimids) < 0) ret++; LOG((4, "dimscale %s is multidimensional and has coords", var->name)); + /* Update var->dim field based on the var->dimids */ + for (d = 0; d < var->ndims; d++) { + /* Ok if does not find a dim at this time, but if found set it */ + nc4_find_dim(grp, var->dimids[d], &var->dim[d], NULL); + } + /* Set my HDF5 IDs free! */ if (spaceid >= 0 && H5Sclose(spaceid) < 0) ret++; #ifdef EXTRA_TESTS @@ -1679,7 +1686,7 @@ read_var(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name, var->dimscale = NC_TRUE; if (var->ndims > 1) { - if ((retval = read_coord_dimids(var))) + if ((retval = read_coord_dimids(grp, var))) BAIL(retval); } else @@ -2676,6 +2683,7 @@ nc4_open_hdf4_file(const char *path, int mode, NC *nc) /* Tell the variable the id of this dimension. */ var->dimids[d] = dim->dimid; + var->dim[d] = dim; } /* Read the atts. */ diff --git a/libsrc4/nc4hdf.c b/libsrc4/nc4hdf.c index 2edfb4de6..cd619c1e6 100644 --- a/libsrc4/nc4hdf.c +++ b/libsrc4/nc4hdf.c @@ -595,21 +595,17 @@ nc4_put_vara(NC *nc, int ncid, int varid, const size_t *startp, * put data beyond their current length. */ for (d2 = 0; d2 < var->ndims; d2++) { - for (dim = grp->dim; dim; dim = dim->l.next) + dim = var->dim[d2]; + assert(dim && dim->dimid == var->dimids[d2]); + if (!dim->unlimited) { - if (dim->dimid == var->dimids[d2]) - { - if (!dim->unlimited) - { - if (start[d2] >= (hssize_t)fdims[d2]) - BAIL_QUIET(NC_EINVALCOORDS); - if (start[d2] + count[d2] > fdims[d2]) - BAIL_QUIET(NC_EEDGE); - } - } + if (start[d2] >= (hssize_t)fdims[d2]) + BAIL_QUIET(NC_EINVALCOORDS); + if (start[d2] + count[d2] > fdims[d2]) + BAIL_QUIET(NC_EEDGE); } } - + /* Now you would think that no one would be crazy enough to write a scalar dataspace with one of the array function calls, but you would be wrong. So let's check to see if the dataset is @@ -705,8 +701,8 @@ nc4_put_vara(NC *nc, int ncid, int varid, const size_t *startp, { for (d2 = 0; d2 < var->ndims; d2++) { - if ((retval = nc4_find_dim(grp, var->dimids[d2], &dim, NULL))) - BAIL(retval); + dim = var->dim[d2]; + assert(dim && dim->dimid == var->dimids[d2]); if (dim->unlimited) { if (start[d2] + count[d2] > fdims[d2]) @@ -921,53 +917,51 @@ nc4_get_vara(NC *nc, int ncid, int varid, const size_t *startp, /* Check dimension bounds. Remember that unlimited dimensions can * put data beyond their current length. */ - for (d2 = 0; d2 < var->ndims; d2++) - for (g = grp; g; g = g->parent) - for (dim = g->dim; dim; dim = dim->l.next) - if (dim->dimid == var->dimids[d2]) - { - if (dim->unlimited) - { - size_t ulen; + for (d2 = 0; d2 < var->ndims; d2++) { + dim = var->dim[d2]; + assert(dim && dim->dimid == var->dimids[d2]); + if (dim->unlimited) + { + size_t ulen; - /* We can't go beyond the largest current extent of - the unlimited dim. */ - if ((retval = NC4_inq_dim(ncid, dim->dimid, NULL, &ulen))) - BAIL(retval); + /* We can't go beyond the largest current extent of + the unlimited dim. */ + if ((retval = NC4_inq_dim(ncid, dim->dimid, NULL, &ulen))) + BAIL(retval); - /* Check for out of bound requests. */ - if (start[d2] >= (hssize_t)ulen && count[d2]) - BAIL_QUIET(NC_EINVALCOORDS); - if (start[d2] + count[d2] > ulen) - BAIL_QUIET(NC_EEDGE); + /* Check for out of bound requests. */ + if (start[d2] >= (hssize_t)ulen && count[d2]) + BAIL_QUIET(NC_EINVALCOORDS); + if (start[d2] + count[d2] > ulen) + BAIL_QUIET(NC_EEDGE); - /* Things get a little tricky here. If we're getting - a GET request beyond the end of this var's - current length in an unlimited dimension, we'll - later need to return the fill value for the - variable. */ - if (start[d2] >= (hssize_t)fdims[d2]) - fill_value_size[d2] = count[d2]; - else if (start[d2] + count[d2] > fdims[d2]) - fill_value_size[d2] = count[d2] - (fdims[d2] - start[d2]); - else - fill_value_size[d2] = 0; - count[d2] -= fill_value_size[d2]; - if (fill_value_size[d2]) - provide_fill++; - } - else - { - /* Check for out of bound requests. */ - if (start[d2] >= (hssize_t)fdims[d2]) - BAIL_QUIET(NC_EINVALCOORDS); - if (start[d2] + count[d2] > fdims[d2]) - BAIL_QUIET(NC_EEDGE); + /* Things get a little tricky here. If we're getting + a GET request beyond the end of this var's + current length in an unlimited dimension, we'll + later need to return the fill value for the + variable. */ + if (start[d2] >= (hssize_t)fdims[d2]) + fill_value_size[d2] = count[d2]; + else if (start[d2] + count[d2] > fdims[d2]) + fill_value_size[d2] = count[d2] - (fdims[d2] - start[d2]); + else + fill_value_size[d2] = 0; + count[d2] -= fill_value_size[d2]; + if (fill_value_size[d2]) + provide_fill++; + } + else + { + /* Check for out of bound requests. */ + if (start[d2] >= (hssize_t)fdims[d2]) + BAIL_QUIET(NC_EINVALCOORDS); + if (start[d2] + count[d2] > fdims[d2]) + BAIL_QUIET(NC_EEDGE); - /* Set the fill value boundary */ - fill_value_size[d2] = count[d2]; - } - } + /* Set the fill value boundary */ + fill_value_size[d2] = count[d2]; + } + } /* A little quirk: if any of the count values are zero, don't * read. */ @@ -1567,11 +1561,10 @@ var_create_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var, nc_bool_t write_dimid /* Check to see if any unlimited dimensions are used in this var. */ for (d = 0; d < var->ndims; d++) { - for (g = grp; g; g = g->parent) - for (dim = g->dim; dim; dim = dim->l.next) - if (dim->dimid == var->dimids[d]) - if (dim->unlimited) - unlimdim++; + dim = var->dim[d]; + assert(dim && dim->dimid == var->dimids[d]); + if (dim->unlimited) + unlimdim++; } /* If there are no unlimited dims, and no filters, and the user @@ -1592,47 +1585,37 @@ var_create_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var, nc_bool_t write_dimid /* Gather current & maximum dimension sizes, along with chunk sizes */ for (d = 0; d < var->ndims; d++) { - for (g = grp; g && (dims_found < var->ndims); g = g->parent) - { - for (dim = g->dim; dim; dim = dim->l.next) - { - if (dim->dimid == var->dimids[d]) - { - dimsize[d] = dim->unlimited ? NC_HDF5_UNLIMITED_DIMSIZE : dim->len; - maxdimsize[d] = dim->unlimited ? H5S_UNLIMITED : (hsize_t)dim->len; - if (!var->contiguous) { - if (var->chunksizes[d]) - chunksize[d] = var->chunksizes[d]; - else - { - size_t type_size; - if (var->type_info->nc_type_class == NC_STRING) - type_size = sizeof(char *); - else - type_size = var->type_info->size; + dim = var->dim[d]; + assert(dim && dim->dimid == var->dimids[d]); + dimsize[d] = dim->unlimited ? NC_HDF5_UNLIMITED_DIMSIZE : dim->len; + maxdimsize[d] = dim->unlimited ? H5S_UNLIMITED : (hsize_t)dim->len; + if (!var->contiguous) { + if (var->chunksizes[d]) + chunksize[d] = var->chunksizes[d]; + else + { + size_t type_size; + if (var->type_info->nc_type_class == NC_STRING) + type_size = sizeof(char *); + else + type_size = var->type_info->size; - /* Unlimited dim always gets chunksize of 1. */ - if (dim->unlimited) - chunksize[d] = 1; - else - chunksize[d] = pow((double)DEFAULT_CHUNK_SIZE/type_size, - 1/(double)(var->ndims - unlimdim)); + /* Unlimited dim always gets chunksize of 1. */ + if (dim->unlimited) + chunksize[d] = 1; + else + chunksize[d] = pow((double)DEFAULT_CHUNK_SIZE/type_size, + 1/(double)(var->ndims - unlimdim)); - /* If the chunksize is greater than the dim - * length, make it the dim length. */ - if (!dim->unlimited && chunksize[d] > dim->len) - chunksize[d] = dim->len; + /* If the chunksize is greater than the dim + * length, make it the dim length. */ + if (!dim->unlimited && chunksize[d] > dim->len) + chunksize[d] = dim->len; - /* Remember the computed chunksize */ - var->chunksizes[d] = chunksize[d]; - } - } - - dims_found++; - break; - } - } - } + /* Remember the computed chunksize */ + var->chunksizes[d] = chunksize[d]; + } + } } if (var->contiguous) @@ -2039,42 +2022,33 @@ attach_dimscales(NC_GRP_INFO_T *grp) { if (!var->dimscale_attached[d]) { - for (g = grp; g && !var->dimscale_attached[d]; g = g->parent) - for (dim1 = g->dim; dim1; dim1 = dim1->l.next) - if (var->dimids[d] == dim1->dimid) - { - hid_t dim_datasetid; /* Dataset ID for dimension */ + dim1 = var->dim[d]; + assert(dim1 && dim1->dimid == var->dimids[d]); + hid_t dim_datasetid; /* Dataset ID for dimension */ - LOG((2, "%s: attaching scale for dimid %d to var %s", - __func__, var->dimids[d], var->name)); + LOG((2, "%s: attaching scale for dimid %d to var %s", + __func__, var->dimids[d], var->name)); - /* Find dataset ID for dimension */ - if (dim1->coord_var) - dim_datasetid = dim1->coord_var->hdf_datasetid; - else - dim_datasetid = dim1->hdf_dimscaleid; - assert(dim_datasetid > 0); - if (H5DSattach_scale(var->hdf_datasetid, dim_datasetid, d) < 0) - BAIL(NC_EHDFERR); - var->dimscale_attached[d] = NC_TRUE; - break; - } + /* Find dataset ID for dimension */ + if (dim1->coord_var) + dim_datasetid = dim1->coord_var->hdf_datasetid; + else + dim_datasetid = dim1->hdf_dimscaleid; + assert(dim_datasetid > 0); + if (H5DSattach_scale(var->hdf_datasetid, dim_datasetid, d) < 0) + BAIL(NC_EHDFERR); + var->dimscale_attached[d] = NC_TRUE; } - /* If we didn't find a dimscale to attach, that's a problem! */ - if (!var->dimscale_attached[d]) - { - LOG((0, "no dimscale found!")); - return NC_EDIMSCALE; - } - } - else - { - /* Create a phoney dimension! */ - + /* If we didn't find a dimscale to attach, that's a problem! */ + if (!var->dimscale_attached[d]) + { + LOG((0, "no dimscale found!")); + return NC_EDIMSCALE; + } } } - } /* next d */ + } } exit: @@ -2223,39 +2197,26 @@ write_var(NC_VAR_INFO_T *var, NC_GRP_INFO_T *grp, nc_bool_t write_dimid) if (var->dimscale_attached) { - int dims_detached = 0; - nc_bool_t finished = NC_FALSE; int d; /* If this is a regular var, detach all its dim scales. */ for (d = 0; d < var->ndims; d++) if (var->dimscale_attached[d]) { - NC_GRP_INFO_T *g; + hid_t dim_datasetid; /* Dataset ID for dimension */ + NC_DIM_INFO_T *dim1 = var->dim[d]; + assert(dim1 && dim1->dimid == var->dimids[d]); - for (g = grp; g && !finished; g = g->parent) - { - NC_DIM_INFO_T *dim1; + /* Find dataset ID for dimension */ + if (dim1->coord_var) + dim_datasetid = dim1->coord_var->hdf_datasetid; + else + dim_datasetid = dim1->hdf_dimscaleid; + assert(dim_datasetid > 0); - for (dim1 = g->dim; dim1; dim1 = dim1->l.next) - if (var->dimids[d] == dim1->dimid) - { - hid_t dim_datasetid; /* Dataset ID for dimension */ - - /* Find dataset ID for dimension */ - if (dim1->coord_var) - dim_datasetid = dim1->coord_var->hdf_datasetid; - else - dim_datasetid = dim1->hdf_dimscaleid; - assert(dim_datasetid > 0); - - if (H5DSdetach_scale(var->hdf_datasetid, dim_datasetid, d) < 0) - BAIL(NC_EHDFERR); - var->dimscale_attached[d] = NC_FALSE; - if (dims_detached++ == var->ndims) - finished = NC_TRUE; - } - } + if (H5DSdetach_scale(var->hdf_datasetid, dim_datasetid, d) < 0) + BAIL(NC_EHDFERR); + var->dimscale_attached[d] = NC_FALSE; } } } @@ -2418,7 +2379,6 @@ write_dim(NC_DIM_INFO_T *dim, NC_GRP_INFO_T *grp, nc_bool_t write_dimid) if (v1) { hsize_t *new_size = NULL; - NC_GRP_INFO_T *g; NC_DIM_INFO_T *dim1; int d1; @@ -2428,21 +2388,8 @@ write_dim(NC_DIM_INFO_T *dim, NC_GRP_INFO_T *grp, nc_bool_t write_dimid) BAIL(NC_ENOMEM); for (d1 = 0; d1 < v1->ndims; d1++) { - if (v1->dimids[d1] == dim->dimid) - new_size[d1] = dim->len; - else - { - int break_it = 0; - - for (g = grp; g && !break_it; g = g->parent) - for (dim1 = g->dim; dim1; dim1 = dim1->l.next) - if (dim1->dimid == v1->dimids[d1]) - { - new_size[d1] = dim1->len; - break_it++; - break; - } - } + assert(v1->dim[d1] && v1->dim[d1]->dimid == v1->dimids[d1]); + new_size[d1] = v1->dim[d1]->len; } if (H5Dset_extent(v1->hdf_datasetid, new_size) < 0) { free(new_size); @@ -3631,6 +3578,17 @@ nc4_rec_match_dimscales(NC_GRP_INFO_T *grp) * try and find a dimension for them. */ for (var = grp->var; var; var = var->l.next) { + /* Check all vars and see if dim[i] != NULL if dimids[i] valid. */ + int ndims = var->ndims; + int d; + for (d = 0; d < ndims; d++) + { + if (var->dim[d] == NULL) { + nc4_find_dim(grp, var->dimids[d], &var->dim[d], NULL); + } + /* assert(var->dim[d] && var->dim[d]->dimid == var->dimids[d]); */ + } + /* Skip dimension scale variables */ if (!var->dimscale) { @@ -3658,6 +3616,7 @@ nc4_rec_match_dimscales(NC_GRP_INFO_T *grp) LOG((4, "%s: for dimension %d, found dim %s", __func__, d, dim->name)); var->dimids[d] = dim->dimid; + var->dim[d] = dim; finished = NC_TRUE; break; } @@ -3757,6 +3716,7 @@ nc4_rec_match_dimscales(NC_GRP_INFO_T *grp) /* The variable must remember the dimid. */ var->dimids[d] = dim->dimid; + var->dim[d] = dim; } /* next dim */ /* Free the memory we malloced. */ diff --git a/libsrc4/nc4var.c b/libsrc4/nc4var.c index 0cc9c4e3f..b47588c3e 100644 --- a/libsrc4/nc4var.c +++ b/libsrc4/nc4var.c @@ -919,8 +919,7 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate, if (!ishdf4) { for (d = 0; d < var->ndims; d++) { - if ((retval = nc4_find_dim(grp, var->dimids[d], &dim, NULL))) - return retval; + dim = var->dim[d]; if (dim->unlimited) return NC_EINVAL; }