Merge pull request #1026 from NetCDF-World-Domination-Council/ejh_lazy_atts_2

Lazy read of attributes
This commit is contained in:
Ward Fisher 2018-06-25 21:04:35 -06:00 committed by GitHub
commit b9d846836b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 319 additions and 51 deletions

View File

@ -180,9 +180,7 @@ typedef struct NC_VAR_INFO
nc_bool_t written_to; /* True if variable has data written to it */
struct NC_TYPE_INFO *type_info;
hid_t hdf_datasetid;
#if 0
int natts; /* Use explicit index because there may be gaps in numbers */
#endif
int atts_not_read; /* If true, the atts have not yet been read. */
NCindex* att; /* NCindex<NC_ATT_INFO_T*> */
nc_bool_t no_fill; /* True if no fill value is defined for var */
void *fill_value;
@ -281,6 +279,7 @@ typedef struct NC_GRP_INFO
hid_t hdf_grpid;
struct NC_HDF5_FILE_INFO *nc4_info;
struct NC_GRP_INFO *parent;
int atts_not_read;
NCindex* children; /* NCindex<struct NC_GRP_INFO*> */
NCindex* dim; /* NCindex<NC_DIM_INFO_T> * */
NCindex* att; /* NCindex<NC_ATT_INFO_T> * */
@ -370,6 +369,8 @@ int nc4_rec_write_groups_types(NC_GRP_INFO_T *grp);
int nc4_enddef_netcdf4_file(NC_HDF5_FILE_INFO_T *h5);
int nc4_reopen_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
int nc4_adjust_var_cache(NC_GRP_INFO_T *grp, NC_VAR_INFO_T * var);
int nc4_read_grp_atts(NC_GRP_INFO_T *grp);
int nc4_read_var_atts(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
/* The following functions manipulate the in-memory linked list of
metadata, without using HDF calls. */

View File

@ -23,24 +23,47 @@ int nc4typelen(nc_type type);
*
* @param grp Group
* @param varid Variable ID | NC_BLOGAL
* @param varp Pointer into which to return created NC_VAR_INFO_T instance
* @param varp Pointer that gets pointer to NC_VAR_INFO_T
* instance. Ignored if NULL.
* @param attlist Pointer that gets pointer to attribute list.
*
* @return Attribute list | NULL
* @author Dennis Heimbigner
* @return NC_NOERR No error.
* @author Dennis Heimbigner, Ed Hartnett
*/
static NCindex *
getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp)
static int
getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp,
NCindex **attlist)
{
if (varid == NC_GLOBAL) {
if(varp) *varp = NULL;
return grp->att;
} else {
NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid);
if (!var) return NULL;
assert(var->hdr.id == varid);
if(varp) *varp = var;
return var->att;
NC_VAR_INFO_T* var;
int retval;
if (varid == NC_GLOBAL)
{
/* Do we need to read the atts? */
if (grp->atts_not_read)
if ((retval = nc4_read_grp_atts(grp)))
return retval;
if (varp)
*varp = NULL;
*attlist = grp->att;
}
else
{
if (!(var = (NC_VAR_INFO_T *)ncindexith(grp->vars, varid)))
return NC_ENOTVAR;
assert(var->hdr.id == varid);
/* Do we need to read the atts? */
if (var->atts_not_read)
if ((retval = nc4_read_var_atts(grp, var)))
return retval;
if (varp)
*varp = var;
*attlist = var->att;
}
return NC_NOERR;
}
/**
@ -92,11 +115,11 @@ NC4_rename_att(int ncid, int varid, const char *name, const char *newname)
if ((retval = nc4_check_name(newname, norm_newname)))
return retval;
/* Is new name in use? */
list = getattlist(grp,varid,&var);
if(list == NULL)
return NC_ENOTVAR;
/* Get the list of attributes. */
if ((retval = getattlist(grp, varid, &var, &list)))
return retval;
/* Is new name in use? */
att = (NC_ATT_INFO_T*)ncindexlookup(list,norm_newname);
if(att != NULL)
return NC_ENAMEINUSE;
@ -175,9 +198,9 @@ NC4_del_att(int ncid, int varid, const char *name)
NC_ATT_INFO_T *att;
NCindex* attlist = NULL;
hid_t locid = 0, datasetid = 0;
int retval = NC_NOERR;
int i;
size_t deletedid;
int retval;
if (!name)
return NC_EINVAL;
@ -204,11 +227,11 @@ NC4_del_att(int ncid, int varid, const char *name)
BAIL(retval);
}
/* Get either the global or a variable attribute list. Also figure
out the HDF5 location it's attached to. */
attlist = getattlist(grp,varid,&var);
if(attlist == NULL)
return NC_ENOTVAR;
/* Get either the global or a variable attribute list. */
if ((retval = getattlist(grp, varid, &var, &attlist)))
return retval;
/* Determine the location id in the HDF5 file. */
if (varid == NC_GLOBAL)
locid = grp->hdf_grpid;
else if (var->created)
@ -297,9 +320,8 @@ NC4_put_att(int ncid, int varid, const char *name, nc_type file_type,
/* Find att, if it exists. (Must check varid first or nc_test will
* break.) */
attlist = getattlist(grp,varid,&var);
if(attlist == NULL)
return NC_ENOTVAR;
if ((ret = getattlist(grp, varid, &var, &attlist)))
return ret;
/* The length needs to be positive (cast needed for braindead
systems with signed size_t). */

View File

@ -1800,6 +1800,39 @@ read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name)
return retval;
}
/**
* @internal This function reads all the attributes of a variable.
*
* @param grp Pointer to the group info.
* @param var Pointer to the var info.
*
* @return NC_NOERR No error.
* @author Ed Hartnett
*/
int
nc4_read_var_atts(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var)
{
att_iter_info att_info; /* Custom iteration information */
/* Check inputs. */
assert(grp && var);
/* Assign var and grp in struct. */
att_info.var = var;
att_info.grp = grp;
/* Now read all the attributes of this variable, ignoring the
ones that hold HDF5 dimension scale information. */
if ((H5Aiterate2(var->hdf_datasetid, H5_INDEX_CRT_ORDER, H5_ITER_INC, NULL,
att_read_var_callbk, &att_info)) < 0)
return NC_EATTMETA;
/* Remember that we have read the atts for this var. */
var->atts_not_read = 0;
return NC_NOERR;
}
/**
* @internal This function is called by read_dataset(), (which is called
* by nc4_rec_read_metadata()) when a netCDF variable is found in the
@ -1825,7 +1858,6 @@ read_var(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name,
hid_t access_pid = 0;
int incr_id_rc = 0; /* Whether the dataset ID's ref count has been incremented */
int d;
att_iter_info att_info; /* Custom iteration information */
H5Z_filter_t filter;
int num_filters;
unsigned int cd_values_zip[CD_NELEMS_ZLIB];
@ -2040,15 +2072,10 @@ read_var(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name,
}
}
/* Now read all the attributes of this variable, ignoring the
ones that hold HDF5 dimension scale information. */
att_info.var = var;
att_info.grp = grp;
if ((H5Aiterate2(var->hdf_datasetid, H5_INDEX_CRT_ORDER, H5_ITER_INC, NULL,
att_read_var_callbk, &att_info)) < 0)
BAIL(NC_EATTMETA);
/* Read variable attributes. */
var->atts_not_read = 1;
/* if ((retval = nc4_read_var_atts(grp, var))) */
/* BAIL(retval); */
/* Is this a deflated variable with a chunksize greater than the
* current cache size? */
@ -2083,8 +2110,8 @@ exit:
* @return ::NC_EHDFERR HDF5 returned error.
* @author Ed Hartnett
*/
static int
read_grp_atts(NC_GRP_INFO_T *grp)
int
nc4_read_grp_atts(NC_GRP_INFO_T *grp)
{
hid_t attid = -1;
hsize_t num_obj, i;
@ -2138,6 +2165,9 @@ read_grp_atts(NC_GRP_INFO_T *grp)
attid = -1;
}
/* Remember that we have read the atts for this group. */
grp->atts_not_read = 0;
exit:
if (attid > 0) {
if(H5Aclose(attid) < 0)
@ -2435,9 +2465,10 @@ nc4_rec_read_metadata(NC_GRP_INFO_T *grp)
BAIL(NC_EHDFERR);
}
/* Scan the group for global (i.e. group-level) attributes. */
if ((retval = read_grp_atts(grp)))
BAIL(retval);
/* Defer the reading of global atts until someone asks for one. */
grp->atts_not_read = 1;
/* if ((retval = nc4_read_grp_atts(grp))) */
/* return retval; */
/* when exiting define mode, mark all variable written */
for (i=0; i<ncindexsize(grp->vars); i++) {
@ -2465,6 +2496,33 @@ exit:
return retval;
}
/**
* @internal Check for the attribute that indicates that netcdf
* classic model is in use.
*
* @param root_grp pointer to the group info for the root group of the
* file.
*
* @return NC_NOERR No error.
* @author Ed Hartnett
*/
static int
check_for_classic_model(NC_GRP_INFO_T *root_grp, int *is_classic)
{
htri_t attr_exists = -1;
/* Check inputs. */
assert(!root_grp->parent && is_classic);
/* If this attribute exists in the root group, then classic model
* is in effect. */
if ((attr_exists = H5Aexists(root_grp->hdf_grpid, NC3_STRICT_ATT_NAME)) < 0)
return NC_EHDFERR;
*is_classic = attr_exists ? 1 : 0;
return NC_NOERR;
}
/**
* @internal Open a netcdf-4 file. Things have already been kicked off
* in ncfunc.c in nc_open, but here the netCDF-4 part of opening a
@ -2485,6 +2543,7 @@ nc4_open_file(const char *path, int mode, void* parameters, NC *nc)
int retval;
unsigned flags;
NC_HDF5_FILE_INFO_T *nc4_info = NULL;
int is_classic;
#ifdef USE_PARALLEL4
NC_MPI_INFO* mpiinfo = NULL;
@ -2612,6 +2671,12 @@ nc4_open_file(const char *path, int mode, void* parameters, NC *nc)
if ((retval = nc4_rec_read_metadata(nc4_info->root_grp)))
BAIL(retval);
/* Check for classic model attribute. */
if ((retval = check_for_classic_model(nc4_info->root_grp, &is_classic)))
BAIL(retval);
if (is_classic)
nc4_info->cmode |= NC_CLASSIC_MODEL;
/* Now figure out which netCDF dims are indicated by the dimscale
* information. */
if ((retval = nc4_rec_match_dimscales(nc4_info->root_grp)))
@ -3016,6 +3081,11 @@ NC4_inq(int ncid, int *ndimsp, int *nvarsp, int *nattsp, int *unlimdimidp)
}
if (nattsp)
{
/* Do we need to read the atts? */
if (grp->atts_not_read)
if ((retval = nc4_read_grp_atts(grp)))
return retval;
*nattsp = ncindexcount(grp->att);
}

View File

@ -116,6 +116,7 @@ nc4_get_att(int ncid, int varid, const char *name, nc_type *xtype,
NC_GRP_INFO_T *grp;
NC_HDF5_FILE_INFO_T *h5;
NC_ATT_INFO_T *att = NULL;
NC_VAR_INFO_T *var;
int my_attnum = -1;
int need_to_convert = 0;
int range_error = NC_NOERR;
@ -136,9 +137,9 @@ nc4_get_att(int ncid, int varid, const char *name, nc_type *xtype,
return retval;
/* Check varid */
if (varid != NC_GLOBAL) {
NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid);
if(var == NULL)
if (varid != NC_GLOBAL)
{
if (!(var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid)))
return NC_ENOTVAR;
assert(var->hdr.id == varid);
}
@ -150,6 +151,20 @@ nc4_get_att(int ncid, int varid, const char *name, nc_type *xtype,
if ((retval = nc4_normalize_name(name, norm_name)))
BAIL(retval);
/* Read the atts for this group/var, if they have not been read. */
if (varid == NC_GLOBAL)
{
if (grp->atts_not_read)
if ((retval = nc4_read_grp_atts(grp)))
return retval;
}
else
{
if (var->atts_not_read)
if ((retval = nc4_read_var_atts(grp, var)))
return retval;
}
/* If this is one of the reserved atts, use nc_get_att_special. */
if (nc->ext_ncid == ncid && varid == NC_GLOBAL) {
const NC_reservedatt* ra = NC_findreserved(norm_name);

View File

@ -460,6 +460,7 @@ nc4_find_grp_att(NC_GRP_INFO_T *grp, int varid, const char *name, int attnum,
{
NC_VAR_INFO_T *var;
NCindex* attlist = NULL;
int retval;
assert(grp && grp->hdr.name);
LOG((4, "nc4_find_grp_att: grp->name %s varid %d name %s attnum %d",
@ -467,11 +468,24 @@ nc4_find_grp_att(NC_GRP_INFO_T *grp, int varid, const char *name, int attnum,
/* Get either the global or a variable attribute list. */
if (varid == NC_GLOBAL)
{
attlist = grp->att;
/* Do we need to read the atts? */
if (grp->atts_not_read)
if ((retval = nc4_read_grp_atts(grp)))
return retval;
}
else
{
var = (NC_VAR_INFO_T*)ncindexith(grp->vars,varid);
if (!var) return NC_ENOTVAR;
/* Do we need to read the var attributes? */
if (var->atts_not_read)
if ((retval = nc4_read_var_atts(grp, var)))
return retval;
attlist = var->att;
assert(var->hdr.id == varid);
}

View File

@ -752,6 +752,11 @@ NC4_inq_var_all(int ncid, int varid, char *name, nc_type *xtypep,
{
if (nattsp)
{
/* Do we need to read the atts? */
if (grp->atts_not_read)
if ((retval = nc4_read_grp_atts(grp)))
return retval;
*nattsp = ncindexcount(grp->att);
}
return NC_NOERR;
@ -775,6 +780,9 @@ NC4_inq_var_all(int ncid, int varid, char *name, nc_type *xtypep,
dimidsp[d] = var->dimids[d];
if (nattsp)
{
if (var->atts_not_read)
if ((retval = nc4_read_var_atts(grp, var)))
return retval;
*nattsp = ncindexcount(var->att);
}

View File

@ -121,8 +121,8 @@ TESTS += run_par_test.sh
endif
if ENABLE_METADATA_PERF
check_PROGRAMS += bigmeta openbigmeta
TESTS += perftest.sh
check_PROGRAMS += bigmeta openbigmeta tst_attsperf
TESTS += tst_attsperf perftest.sh
endif
EXTRA_DIST = run_par_test.sh run_bm.sh run_bm_test1.sh \

0
nc_test4/perftest.sh Normal file → Executable file
View File

138
nc_test4/tst_attsperf.c Normal file
View File

@ -0,0 +1,138 @@
/* This is part of the netCDF package. Copyright 2018 University
* Corporation for Atmospheric Research/Unidata. See COPYRIGHT file
* for conditions of use.
*
* Test the netCDF-4 attribute code.
*
* WARNING: do not attempt to run this under windows because of the use
* of gettimeofday().
*
* Ed Hartnett 6/19/18
*/
#include <config.h>
#include <nc_tests.h>
#include "err_macros.h"
#include "nc4internal.h"
#include <sys/time.h>
#define TEST "tst_attsperf"
#define VAR "bigvar"
#define NDIMS 2
#define DIM0 "d0"
#define DIM1 "d1"
#define DIMSIZE0 16
#define DIMSIZE1 512
#define TOTALSIZE (DIMSIZE0 * DIMSIZE1)
#define NUM_ATTS 100
#define ATT_LEN 10
#define NUM_VARS 100
int
add_attributes(int ncid, int varid)
{
char att_name[NC_MAX_NAME + 1];
double att_data[ATT_LEN];
int i, a;
/* Fill up data. */
for (i = 0; i < ATT_LEN; i++)
att_data[i] = i;
/* Write a bunch of attributes. */
for (a = 0; a < NUM_ATTS; a++)
{
sprintf(att_name, "%s_varid_%d_att_%d", TEST, varid, a);
if (nc_put_att_double(ncid, varid, att_name, NC_DOUBLE,
ATT_LEN, att_data)) ERR;
}
return 0;
}
int
buildfile(int file_no)
{
int ncid, varid;
int dimids[NDIMS];
char file_name[NC_MAX_NAME + 1];
int v;
sprintf(file_name, "%s_%d.nc", TEST, file_no);
if (nc_create(file_name, NC_NETCDF4, &ncid)) ERR;
if (nc_def_dim(ncid, DIM0, DIMSIZE0, &dimids[0])) ERR;
if (nc_def_dim(ncid, DIM1, DIMSIZE1, &dimids[1])) ERR;
for (v = 0; v < NUM_VARS; v++)
{
char var_name[NC_MAX_NAME + 1];
sprintf(var_name, "%s_var_%d", TEST, v);
if (nc_def_var(ncid, var_name, NC_INT, NDIMS, dimids, &varid)) ERR;
if (add_attributes(ncid, v)) ERR;
}
if (add_attributes(ncid, NC_GLOBAL)) ERR;
if (nc_enddef(ncid)) ERR;
if (nc_close(ncid)) ERR;
return 0;
}
long long
readfile(int inq_all)
{
int ncid;
struct timeval starttime, endtime;
long long delta;
long long startt, endt;
char file_name[NC_MAX_NAME + 1];
sprintf(file_name, "%s_%d.nc", TEST, inq_all);
/* Start the clock. */
gettimeofday(&starttime, NULL);
/* Open the file. */
if (nc_open(file_name, NC_NETCDF4, &ncid)) ERR;
/* Simulate old open by triggering attribute reads, if desired. */
if (inq_all)
{
int natts;
int v;
/* When checking the number of atts, we trigger the read. */
if (nc_inq(ncid, NULL, NULL, &natts, NULL)) ERR;
for (v = 0; v < NUM_VARS; v++)
if (nc_inq_varnatts(ncid, v, &natts)) ERR;
}
gettimeofday(&endtime, NULL);
/* Close the file. */
if (nc_close(ncid)) ERR;
/* Compute the time delta */
startt = (1000000 * starttime.tv_sec) + starttime.tv_usec;
endt = (1000000 * endtime.tv_sec) + endtime.tv_usec;
delta = endt - startt;
return delta;
}
int
main(int argc, char **argv)
{
long long zerodelta, onedelta, factor;
printf("testing speed of open with files with lots of metadata...\n");
if (buildfile(0)) ERR;
if (buildfile(1)) ERR;
if ((zerodelta = readfile(0)) == -1) ERR;
if ((onedelta = readfile(1)) == -1) ERR;
/* Print results to the millisec */
factor = onedelta / zerodelta;
printf("Lazy Atts time=%lld Read Atts at Open time=%lld Speedup=%lld\n",
zerodelta, onedelta, factor);
SUMMARIZE_ERR;
FINAL_RESULTS;
}