Re-Implement the nc_get/put_vars operations for netcdf-4 using the

corresponding HDF5 operations.

re: github issue https://github.com/Unidata/netcdf-c/issues/908
also in reference to https://github.com/pydata/xarray/issues/2004

The netcdf-c library has implemented the nc_get_vars and nc_put_vars
operations as element at a time. This has resulted in very slow
operation.

This pr attempts to improve the situation for netcdf-4/hdf5 files
by using the slab operations provided by the hdf5 library. The new
implementation passes the get/put vars stride information down to
the hdf5 slab operations.

The result appears to improve performance significantly. Some simple
tests on large 2-D arrays shows speedups in excess of 150.

Misc. other changes:
1. fix bug in ncgen/semantics.c; using a list's allocated length
   instead of actual length.
2. Added a temporary hook in the netcdf library plus a performance
   test case (tst_varsperf.c) to estimate the speedup. After users
   have had some experience with this, I will remove it, probably
   after the 4.7 release.
This commit is contained in:
Dennis Heimbigner 2018-05-22 16:50:52 -06:00
parent 2a9eaffd67
commit ee509ff4f3
11 changed files with 1043 additions and 9 deletions

View File

@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release
## 4.7.0 - TBD
* [Enhancement] Improved the performance of the nc_get/put_vars operations by using the equivalent slab capabilities of hdf5. Result is a significant speedup of these operations. See [GitHub #1001](https://github.com/Unidata/netcdf-c/pull/1001) for more information.
* [Enhancement] Expanded the capabilities of `NC_INMEMORY` to support writing and accessing the final modified memory. See [GitHub #879](https://github.com/Unidata/netcdf-c/pull/879) for more information.
* [Enhancement] Made CDF5 support enabled by default. See [Github #931](https://github.com/Unidata/netcdf-c/issues/931) for more information.
* [Bug Fix] Corrected a number of memory issues identified in `ncgen`. See [GitHub #558 for more information](https://github.com/Unidata/netcdf-c/pull/558).

View File

@ -147,6 +147,16 @@ NC4_get_vara(int ncid, int varid,
const size_t *start, const size_t *count,
void *value, nc_type);
extern int
NC4_put_vars(int ncid, int varid,
const size_t *start, const size_t *count, const ptrdiff_t* stride,
const void *value, nc_type);
extern int
NC4_get_vars(int ncid, int varid,
const size_t *start, const size_t *count, const ptrdiff_t* stride,
void *value, nc_type);
/* End _var */
/* netCDF4 API only */

View File

@ -360,6 +360,12 @@ int nc4_put_vara(NC *nc, int ncid, int varid, const size_t *startp,
const size_t *countp, nc_type xtype, int is_long, void *op);
int nc4_get_vara(NC *nc, int ncid, int varid, const size_t *startp,
const size_t *countp, nc_type xtype, int is_long, void *op);
int nc4_put_vars(NC *nc, int ncid, int varid, const size_t *startp,
const size_t *countp, const ptrdiff_t* stridep,
nc_type xtype, int is_long, void *op);
int nc4_get_vars(NC *nc, int ncid, int varid, const size_t *startp,
const size_t *countp, const ptrdiff_t* stridep,
nc_type xtype, int is_long, void *op);
int nc4_rec_match_dimscales(NC_GRP_INFO_T *grp);
int nc4_rec_detect_need_to_preserve_dimids(NC_GRP_INFO_T *grp, nc_bool_t *bad_coord_orderp);
int nc4_rec_write_metadata(NC_GRP_INFO_T *grp, nc_bool_t bad_coord_order);

View File

@ -4530,3 +4530,784 @@ NC4_walk(hid_t gid, int* countp)
}
return ncstat;
}
/**
* @internal Write a strided array of data to a variable.
*
* @param nc Pointer to the file NC struct.
* @param ncid File ID.
* @param varid Variable ID.
* @param startp Array of start indices.
* @param countp Array of counts.
* @param stridep Array of strides.
* @param mem_nc_type The type of the data in memory.
* @param is_long True only if NC_LONG is the memory type.
* @param data The data to be written.
*
* @returns ::NC_NOERR No error.
* @returns ::NC_EBADID Bad ncid.
* @returns ::NC_ENOTVAR Var not found.
* @returns ::NC_EHDFERR HDF5 function returned error.
* @returns ::NC_EINVALCOORDS Incorrect start.
* @returns ::NC_EEDGE Incorrect start/count.
* @returns ::NC_ENOMEM Out of memory.
* @returns ::NC_EMPI MPI library error (parallel only)
* @returns ::NC_ECANTEXTEND Can't extend dimension for write.
* @returns ::NC_ERANGE Data conversion error.
* @author Ed Hartnett
*/
int
nc4_put_vars(NC *nc, int ncid, int varid, const size_t *startp,
const size_t *countp, const ptrdiff_t* stridep,
nc_type mem_nc_type, int is_long, void *data)
{
NC_GRP_INFO_T *grp;
NC_HDF5_FILE_INFO_T *h5;
NC_VAR_INFO_T *var;
NC_DIM_INFO_T *dim;
hid_t file_spaceid = 0, mem_spaceid = 0, xfer_plistid = 0;
long long unsigned xtend_size[NC_MAX_VAR_DIMS];
hsize_t fdims[NC_MAX_VAR_DIMS], fmaxdims[NC_MAX_VAR_DIMS];
hsize_t start[NC_MAX_VAR_DIMS], count[NC_MAX_VAR_DIMS];
hssize_t stride[NC_MAX_VAR_DIMS];
char *name_to_use;
int need_to_extend = 0;
#ifdef USE_PARALLEL4
int extend_possible = 0;
#endif
int retval = NC_NOERR, range_error = 0, i, d2;
void *bufr = NULL;
#ifndef HDF5_CONVERT
int need_to_convert = 0;
size_t len = 1;
#endif
#ifdef HDF5_CONVERT
hid_t mem_typeid = 0;
#endif
/* Find our metadata for this file, group, and var. */
assert(nc);
if ((retval = nc4_find_g_var_nc(nc, ncid, varid, &grp, &var)))
return retval;
h5 = NC4_DATA(nc);
assert(grp && h5 && var && var->hdr.name);
LOG((3, "%s: var->hdr.name %s mem_nc_type %d is_long %d",
__func__, var->hdr.name, mem_nc_type, is_long));
/* Check some stuff about the type and the file. If the file must
* be switched from define mode, it happens here. */
if ((retval = check_for_vara(&mem_nc_type, var, h5)))
return retval;
/* Convert from size_t and ptrdiff_t to hssize_t, and hsize_t. */
/* Also do sanity checks */
for (i = 0; i < var->ndims; i++)
{
start[i] = (startp == NULL ? 0 : startp[i]);
count[i] = (countp == NULL ? 1 : countp[i]);
stride[i] = (stridep == NULL ? 1 : stridep[i]);
/* Check for non-positive stride */
if(stride[i] <= 0)
return NC_ESTRIDE;
}
/* Open this dataset if necessary, also checking for a weird case:
* a non-coordinate (and non-scalar) variable that has the same
* name as a dimension. */
if (var->hdf5_name && strlen(var->hdf5_name) >= strlen(NON_COORD_PREPEND) &&
strncmp(var->hdf5_name, NON_COORD_PREPEND, strlen(NON_COORD_PREPEND)) == 0 &&
var->ndims)
name_to_use = var->hdf5_name;
else
name_to_use = var->hdr.name;
if (!var->hdf_datasetid)
if ((var->hdf_datasetid = H5Dopen2(grp->hdf_grpid, name_to_use, H5P_DEFAULT)) < 0)
return NC_ENOTVAR;
/* Get file space of data. */
if ((file_spaceid = H5Dget_space(var->hdf_datasetid)) < 0)
BAIL(NC_EHDFERR);
/* Check to ensure the user selection is
* valid. H5Sget_simple_extent_dims gets the sizes of all the dims
* and put them in fdims. */
if (H5Sget_simple_extent_dims(file_spaceid, fdims, fmaxdims) < 0)
BAIL(NC_EHDFERR);
#ifdef LOGGING
log_dim_info(var, fdims, fmaxdims, start, count);
#endif
/* Check dimension bounds. Remember that unlimited dimensions can
* put data beyond their current length. */
for (d2 = 0; d2 < var->ndims; d2++)
{
hsize_t endindex = start[d2] + (stride[d2]*(count[d2]-1)); /* last index written */
dim = var->dim[d2];
assert(dim && dim->hdr.id == var->dimids[d2]);
if(count[d2] == 0)
endindex = start[d2]; /* fixup for zero read count */
if (!dim->unlimited)
{
#ifdef RELAX_COORD_BOUND
if (start[d2] > (hssize_t)fdims[d2] ||
(start[d2] == (hssize_t)fdims[d2] && count[d2] > 0))
#else
if (start[d2] >= (hssize_t)fdims[d2])
#endif
BAIL_QUIET(NC_EINVALCOORDS);
if (endindex >= fdims[d2])
BAIL_QUIET(NC_EEDGE);
}
}
/* Now you would think that no one would be crazy enough to write
a scalar dataspace with one of the array function calls, but you
would be wrong. So let's check to see if the dataset is
scalar. If it is, we won't try to set up a hyperslab. */
if (H5Sget_simple_extent_type(file_spaceid) == H5S_SCALAR)
{
if ((mem_spaceid = H5Screate(H5S_SCALAR)) < 0)
BAIL(NC_EHDFERR);
}
else
{
if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET, start, stride,
count, NULL) < 0)
BAIL(NC_EHDFERR);
/* Create a space for the memory, just big enough to hold the slab
we want. */
if ((mem_spaceid = H5Screate_simple(var->ndims, count, NULL)) < 0)
BAIL(NC_EHDFERR);
}
#ifndef HDF5_CONVERT
/* Are we going to convert any data? (No converting of compound or
* opaque types.) */
if ((mem_nc_type != var->type_info->hdr.id || (var->type_info->hdr.id == NC_INT && is_long)) &&
mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE)
{
size_t file_type_size;
/* We must convert - allocate a buffer. */
need_to_convert++;
if (var->ndims)
for (d2=0; d2<var->ndims; d2++)
len *= countp[d2];
LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name,
var->type_info->hdr.id, len));
/* Later on, we will need to know the size of this type in the
* file. */
assert(var->type_info->size);
file_type_size = var->type_info->size;
/* If we're reading, we need bufr to have enough memory to store
* the data in the file. If we're writing, we need bufr to be
* big enough to hold all the data in the file's type. */
if(len > 0)
if (!(bufr = malloc(len * file_type_size)))
BAIL(NC_ENOMEM);
}
else
#endif /* ifndef HDF5_CONVERT */
bufr = data;
#ifdef HDF5_CONVERT
/* Get the HDF type of the data in memory. */
if ((retval = nc4_get_hdf_typeid(h5, mem_nc_type, &mem_typeid,
var->type_info->endianness)))
BAIL(retval);
#endif
/* Create the data transfer property list. */
if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0)
BAIL(NC_EHDFERR);
/* Apply the callback function which will detect range
* errors. Which one to call depends on the length of the
* destination buffer type. */
#ifdef HDF5_CONVERT
if (H5Pset_type_conv_cb(xfer_plistid, except_func, &range_error) < 0)
BAIL(NC_EHDFERR);
#endif
#ifdef USE_PARALLEL4
/* Set up parallel I/O, if needed. */
if ((retval = set_par_access(h5, var, xfer_plistid)))
BAIL(retval);
#endif
/* Read this hyperslab from memory. */
/* Does the dataset have to be extended? If it's already
extended to the required size, it will do no harm to reextend
it to that size. */
if (var->ndims)
{
for (d2 = 0; d2 < var->ndims; d2++)
{
hsize_t endindex = start[d2] + (stride[d2]*(count[d2]-1)); /* last index written */
if(count[d2] == 0)
endindex = start[d2];
dim = var->dim[d2];
assert(dim && dim->hdr.id == var->dimids[d2]);
if (dim->unlimited)
{
#ifdef USE_PARALLEL4
extend_possible = 1;
#endif
if (endindex >= fdims[d2])
{
xtend_size[d2] = (long long unsigned)(endindex+1);
need_to_extend++;
}
else
xtend_size[d2] = (long long unsigned)fdims[d2];
if (endindex >= dim->len)
{
dim->len = endindex+1;
dim->extended = NC_TRUE;
}
}
else
{
xtend_size[d2] = (long long unsigned)dim->len;
}
}
#ifdef USE_PARALLEL4
/* Check if anyone wants to extend */
if (extend_possible && h5->parallel && NC_COLLECTIVE == var->parallel_access)
{
/* Form consensus opinion among all processes about whether to perform
* collective I/O
*/
if(MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, &need_to_extend, 1, MPI_INT, MPI_BOR, h5->comm))
BAIL(NC_EMPI);
}
#endif /* USE_PARALLEL4 */
/* If we need to extend it, we also need a new file_spaceid
to reflect the new size of the space. */
if (need_to_extend)
{
LOG((4, "extending dataset"));
#ifdef USE_PARALLEL4
if (h5->parallel)
{
if(NC_COLLECTIVE != var->parallel_access)
BAIL(NC_ECANTEXTEND);
/* Reach consensus about dimension sizes to extend to */
if(MPI_SUCCESS != MPI_Allreduce(MPI_IN_PLACE, xtend_size, var->ndims, MPI_UNSIGNED_LONG_LONG, MPI_MAX, h5->comm))
BAIL(NC_EMPI);
}
#endif /* USE_PARALLEL4 */
/* Convert xtend_size back to hsize_t for use with H5Dset_extent */
for (d2 = 0; d2 < var->ndims; d2++)
fdims[d2] = (hsize_t)xtend_size[d2];
if (H5Dset_extent(var->hdf_datasetid, fdims) < 0)
BAIL(NC_EHDFERR);
if (file_spaceid > 0 && H5Sclose(file_spaceid) < 0)
BAIL2(NC_EHDFERR);
if ((file_spaceid = H5Dget_space(var->hdf_datasetid)) < 0)
BAIL(NC_EHDFERR);
if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET,
start, stride, count, NULL) < 0)
BAIL(NC_EHDFERR);
}
}
#ifndef HDF5_CONVERT
/* Do we need to convert the data? */
if (need_to_convert)
{
if ((retval = nc4_convert_type(data, bufr, mem_nc_type, var->type_info->hdr.id,
len, &range_error, var->fill_value,
(h5->cmode & NC_CLASSIC_MODEL), is_long, 0)))
BAIL(retval);
}
#endif
/* Write the data. At last! */
LOG((4, "about to H5Dwrite datasetid 0x%x mem_spaceid 0x%x "
"file_spaceid 0x%x", var->hdf_datasetid, mem_spaceid, file_spaceid));
if (H5Dwrite(var->hdf_datasetid, var->type_info->hdf_typeid,
mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0)
BAIL(NC_EHDFERR);
/* Remember that we have written to this var so that Fill Value
* can't be set for it. */
if (!var->written_to)
var->written_to = NC_TRUE;
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
exit:
#ifdef HDF5_CONVERT
if (mem_typeid > 0 && H5Tclose(mem_typeid) < 0)
BAIL2(NC_EHDFERR);
#endif
if (file_spaceid > 0 && H5Sclose(file_spaceid) < 0)
BAIL2(NC_EHDFERR);
if (mem_spaceid > 0 && H5Sclose(mem_spaceid) < 0)
BAIL2(NC_EHDFERR);
if (xfer_plistid && (H5Pclose(xfer_plistid) < 0))
BAIL2(NC_EPARINIT);
#ifndef HDF5_CONVERT
if (need_to_convert && bufr) free(bufr);
#endif
/* If there was an error return it, otherwise return any potential
range error value. If none, return NC_NOERR as usual.*/
if (retval)
return retval;
if (range_error)
return NC_ERANGE;
return NC_NOERR;
}
/**
* @internal Read a strided array of data from a variable.
*
* @param nc Pointer to the file NC struct.
* @param ncid File ID.
* @param varid Variable ID.
* @param startp Array of start indices.
* @param countp Array of counts.
* @param stridep Array of strides.
* @param mem_nc_type The type of the data in memory. (Convert to this
* type from file type.)
* @param is_long True only if NC_LONG is the memory type.
* @param data The data to be written.
*
* @returns ::NC_NOERR No error.
* @returns ::NC_EBADID Bad ncid.
* @returns ::NC_ENOTVAR Var not found.
* @returns ::NC_EHDFERR HDF5 function returned error.
* @returns ::NC_EINVALCOORDS Incorrect start.
* @returns ::NC_EEDGE Incorrect start/count.
* @returns ::NC_ENOMEM Out of memory.
* @returns ::NC_EMPI MPI library error (parallel only)
* @returns ::NC_ECANTEXTEND Can't extend dimension for write.
* @returns ::NC_ERANGE Data conversion error.
* @author Ed Hartnett
*/
int
nc4_get_vars(NC *nc, int ncid, int varid, const size_t *startp,
const size_t *countp, const ptrdiff_t* stridep,
nc_type mem_nc_type, int is_long, void *data)
{
NC_GRP_INFO_T *grp;
NC_HDF5_FILE_INFO_T *h5;
NC_VAR_INFO_T *var;
NC_DIM_INFO_T *dim;
hid_t file_spaceid = 0, mem_spaceid = 0;
hid_t xfer_plistid = 0;
size_t file_type_size;
hsize_t *xtend_size = NULL, count[NC_MAX_VAR_DIMS];
hsize_t fdims[NC_MAX_VAR_DIMS], fmaxdims[NC_MAX_VAR_DIMS];
hsize_t start[NC_MAX_VAR_DIMS];
hsize_t stride[NC_MAX_VAR_DIMS];
char *name_to_use;
void *fillvalue = NULL;
int no_read = 0, provide_fill = 0;
int fill_value_size[NC_MAX_VAR_DIMS];
int scalar = 0, retval = NC_NOERR, range_error = 0, i, d2;
void *bufr = NULL;
#ifdef HDF5_CONVERT
hid_t mem_typeid = 0;
#endif
#ifndef HDF5_CONVERT
int need_to_convert = 0;
size_t len = 1;
#endif
/* Find our metadata for this file, group, and var. */
assert(nc);
if ((retval = nc4_find_g_var_nc(nc, ncid, varid, &grp, &var)))
return retval;
h5 = NC4_DATA(nc);
assert(grp && h5 && var && var->hdr.name);
LOG((3, "%s: var->hdr.name %s mem_nc_type %d is_long %d",
__func__, var->hdr.name, mem_nc_type, is_long));
/* Check some stuff about the type and the file. */
if ((retval = check_for_vara(&mem_nc_type, var, h5)))
return retval;
/* Convert from size_t and ptrdiff_t to hssize_t, and hsize_t. */
/* Also do sanity checks */
for (i = 0; i < var->ndims; i++)
{
start[i] = (startp == NULL ? 0 : startp[i]);
count[i] = (countp == NULL ? 1 : countp[i]);
stride[i] = (stridep == NULL ? 1 : stridep[i]);
/* if any of the count values are zero don't actually read. */
if (count[i] == 0)
no_read++;
/* if any of the stride values are non-positive, fail. */
if (stride[i] <= 0)
return NC_ESTRIDE;
}
/* Open this dataset if necessary, also checking for a weird case:
* a non-coordinate (and non-scalar) variable that has the same
* name as a dimension. */
if (var->hdf5_name && strlen(var->hdf5_name) >= strlen(NON_COORD_PREPEND) &&
strncmp(var->hdf5_name, NON_COORD_PREPEND, strlen(NON_COORD_PREPEND)) == 0 &&
var->ndims)
name_to_use = var->hdf5_name;
else
name_to_use = var->hdr.name;
if (!var->hdf_datasetid)
if ((var->hdf_datasetid = H5Dopen2(grp->hdf_grpid, name_to_use, H5P_DEFAULT)) < 0)
return NC_ENOTVAR;
/* Get file space of data. */
if ((file_spaceid = H5Dget_space(var->hdf_datasetid)) < 0)
BAIL(NC_EHDFERR);
/* Check to ensure the user selection is
* valid. H5Sget_simple_extent_dims gets the sizes of all the dims
* and put them in fdims. */
if (H5Sget_simple_extent_dims(file_spaceid, fdims, fmaxdims) < 0)
BAIL(NC_EHDFERR);
#ifdef LOGGING
log_dim_info(var, fdims, fmaxdims, start, count);
#endif
/* Check dimension bounds. Remember that unlimited dimensions can
* put data beyond their current length. */
for (d2 = 0; d2 < var->ndims; d2++) {
hsize_t endindex = start[d2] + (stride[d2]*(count[d2]-1)); /* last index read */
dim = var->dim[d2];
assert(dim && dim->hdr.id == var->dimids[d2]);
if(count[d2] == 0)
endindex = start[d2]; /* fixup for zero read count */
if (dim->unlimited)
{
size_t ulen;
/* We can't go beyond the largest current extent of
the unlimited dim. */
if ((retval = NC4_inq_dim(ncid, dim->hdr.id, NULL, &ulen)))
BAIL(retval);
/* Check for out of bound requests. */
#ifdef RELAX_COORD_BOUND
if (start[d2] > (hssize_t)ulen ||
(start[d2] == (hssize_t)ulen && count[d2] > 0))
#else
if (start[d2] >= (hssize_t)ulen && ulen > 0)
#endif
BAIL_QUIET(NC_EINVALCOORDS);
if (endindex >= ulen)
BAIL_QUIET(NC_EEDGE);
/* Things get a little tricky here. If we're getting
a GET request beyond the end of this var's
current length in an unlimited dimension, we'll
later need to return the fill value for the
variable. */
if (start[d2] >= (hssize_t)fdims[d2])
fill_value_size[d2] = count[d2];
else if (endindex >= fdims[d2])
fill_value_size[d2] = count[d2] - ((fdims[d2] - start[d2])/stride[d2]);
else
fill_value_size[d2] = 0;
count[d2] -= fill_value_size[d2];
if (fill_value_size[d2])
provide_fill++;
}
else
{
/* Check for out of bound requests. */
#ifdef RELAX_COORD_BOUND
if (start[d2] > (hssize_t)fdims[d2] ||
(start[d2] == (hssize_t)fdims[d2] && count[d2] > 0))
#else
if (start[d2] >= (hssize_t)fdims[d2])
#endif
BAIL_QUIET(NC_EINVALCOORDS);
if (endindex >= fdims[d2])
BAIL_QUIET(NC_EEDGE);
/* Set the fill value boundary */
fill_value_size[d2] = count[d2];
}
}
/* Later on, we will need to know the size of this type in the
* file. */
assert(var->type_info->size);
file_type_size = var->type_info->size;
if (!no_read)
{
/* Now you would think that no one would be crazy enough to write
a scalar dataspace with one of the array function calls, but you
would be wrong. So let's check to see if the dataset is
scalar. If it is, we won't try to set up a hyperslab. */
if (H5Sget_simple_extent_type(file_spaceid) == H5S_SCALAR)
{
if ((mem_spaceid = H5Screate(H5S_SCALAR)) < 0)
BAIL(NC_EHDFERR);
scalar++;
}
else
{
if (H5Sselect_hyperslab(file_spaceid, H5S_SELECT_SET,
start, stride, count, NULL) < 0)
BAIL(NC_EHDFERR);
/* Create a space for the memory, just big enough to hold the slab
we want. */
if ((mem_spaceid = H5Screate_simple(var->ndims, count, NULL)) < 0)
BAIL(NC_EHDFERR);
}
/* Fix bug when reading HDF5 files with variable of type
* fixed-length string. We need to make it look like a
* variable-length string, because that's all netCDF-4 data
* model supports, lacking anonymous dimensions. So
* variable-length strings are in allocated memory that user has
* to free, which we allocate here. */
if(var->type_info->nc_type_class == NC_STRING &&
H5Tget_size(var->type_info->hdf_typeid) > 1 &&
!H5Tis_variable_str(var->type_info->hdf_typeid)) {
hsize_t fstring_len;
if ((fstring_len = H5Tget_size(var->type_info->hdf_typeid)) == 0)
BAIL(NC_EHDFERR);
if (!(*(char **)data = malloc(1 + fstring_len)))
BAIL(NC_ENOMEM);
bufr = *(char **)data;
}
#ifndef HDF5_CONVERT
/* Are we going to convert any data? (No converting of compound or
* opaque types.) */
if ((mem_nc_type != var->type_info->hdr.id || (var->type_info->hdr.id == NC_INT && is_long)) &&
mem_nc_type != NC_COMPOUND && mem_nc_type != NC_OPAQUE)
{
/* We must convert - allocate a buffer. */
need_to_convert++;
if (var->ndims)
for (d2 = 0; d2 < var->ndims; d2++)
len *= countp[d2];
LOG((4, "converting data for var %s type=%d len=%d", var->hdr.name,
var->type_info->hdr.id, len));
/* If we're reading, we need bufr to have enough memory to store
* the data in the file. If we're writing, we need bufr to be
* big enough to hold all the data in the file's type. */
if(len > 0)
if (!(bufr = malloc(len * file_type_size)))
BAIL(NC_ENOMEM);
}
else
#endif /* ifndef HDF5_CONVERT */
if(!bufr)
bufr = data;
/* Get the HDF type of the data in memory. */
#ifdef HDF5_CONVERT
if ((retval = nc4_get_hdf_typeid(h5, mem_nc_type, &mem_typeid,
var->type_info->endianness)))
BAIL(retval);
#endif
/* Create the data transfer property list. */
if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0)
BAIL(NC_EHDFERR);
#ifdef HDF5_CONVERT
/* Apply the callback function which will detect range
* errors. Which one to call depends on the length of the
* destination buffer type. */
if (H5Pset_type_conv_cb(xfer_plistid, except_func, &range_error) < 0)
BAIL(NC_EHDFERR);
#endif
#ifdef USE_PARALLEL4
/* Set up parallel I/O, if needed. */
if ((retval = set_par_access(h5, var, xfer_plistid)))
BAIL(retval);
#endif
/* Read this hyperslab into memory. */
LOG((5, "About to H5Dread some data..."));
if (H5Dread(var->hdf_datasetid, var->type_info->native_hdf_typeid,
mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0)
BAIL(NC_EHDFERR);
#ifndef HDF5_CONVERT
/* Eventually the block below will go away. Right now it's
needed to support conversions between int/float, and range
checking converted data in the netcdf way. These features are
being added to HDF5 at the HDF5 World Hall of Coding right
now, by a staff of thousands of programming gnomes. */
if (need_to_convert)
{
if ((retval = nc4_convert_type(bufr, data, var->type_info->hdr.id, mem_nc_type,
len, &range_error, var->fill_value,
(h5->cmode & NC_CLASSIC_MODEL), 0, is_long)))
BAIL(retval);
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
}
#endif
/* For strict netcdf-3 rules, ignore erange errors between UBYTE
* and BYTE types. */
if ((h5->cmode & NC_CLASSIC_MODEL) &&
(var->type_info->hdr.id == NC_UBYTE || var->type_info->hdr.id == NC_BYTE) &&
(mem_nc_type == NC_UBYTE || mem_nc_type == NC_BYTE) &&
range_error)
range_error = 0;
} /* endif ! no_read */
else {
#ifdef USE_PARALLEL4 /* Start block contributed by HDF group. */
/* For collective IO read, some processes may not have any element for reading.
Collective requires all processes to participate, so we use H5Sselect_none
for these processes. */
if(var->parallel_access == NC_COLLECTIVE) {
/* Create the data transfer property list. */
if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0)
BAIL(NC_EHDFERR);
if ((retval = set_par_access(h5, var, xfer_plistid)))
BAIL(retval);
if (H5Sselect_none(file_spaceid)<0)
BAIL(NC_EHDFERR);
/* Since no element will be selected, we just get the memory space the same as the file space.
*/
if((mem_spaceid = H5Dget_space(var->hdf_datasetid))<0)
BAIL(NC_EHDFERR);
if (H5Sselect_none(mem_spaceid)<0)
BAIL(NC_EHDFERR);
/* Read this hyperslab into memory. */
LOG((5, "About to H5Dread some data..."));
if (H5Dread(var->hdf_datasetid, var->type_info->native_hdf_typeid,
mem_spaceid, file_spaceid, xfer_plistid, bufr) < 0)
BAIL(NC_EHDFERR);
}
#endif /* End ifdef USE_PARALLEL4 */
}
/* Now we need to fake up any further data that was asked for,
using the fill values instead. First skip past the data we
just read, if any. */
if (!scalar && provide_fill)
{
void *filldata;
size_t real_data_size = 0;
size_t fill_len;
/* Skip past the real data we've already read. */
if (!no_read)
for (real_data_size = file_type_size, d2 = 0; d2 < var->ndims; d2++)
real_data_size *= (count[d2] - start[d2]);
/* Get the fill value from the HDF5 variable. Memory will be
* allocated. */
if (get_fill_value(h5, var, &fillvalue) < 0)
BAIL(NC_EHDFERR);
/* How many fill values do we need? */
for (fill_len = 1, d2 = 0; d2 < var->ndims; d2++)
fill_len *= (fill_value_size[d2] ? fill_value_size[d2] : 1);
/* Copy the fill value into the rest of the data buffer. */
filldata = (char *)data + real_data_size;
for (i = 0; i < fill_len; i++)
{
if (var->type_info->nc_type_class == NC_STRING)
{
if (*(char **)fillvalue)
{
if (!(*(char **)filldata = strdup(*(char **)fillvalue)))
BAIL(NC_ENOMEM);
}
else
*(char **)filldata = NULL;
}
else if(var->type_info->nc_type_class == NC_VLEN) {
if(fillvalue) {
memcpy(filldata,fillvalue,file_type_size);
} else {
*(char **)filldata = NULL;
}
} else
memcpy(filldata, fillvalue, file_type_size);
filldata = (char *)filldata + file_type_size;
}
}
exit:
#ifdef HDF5_CONVERT
if (mem_typeid > 0 && H5Tclose(mem_typeid) < 0)
BAIL2(NC_EHDFERR);
#endif
if (file_spaceid > 0)
{
if (H5Sclose(file_spaceid) < 0)
BAIL2(NC_EHDFERR);
}
if (mem_spaceid > 0)
{
if (H5Sclose(mem_spaceid) < 0)
BAIL2(NC_EHDFERR);
}
if (xfer_plistid > 0)
{
if (H5Pclose(xfer_plistid) < 0)
BAIL2(NC_EHDFERR);
}
#ifndef HDF5_CONVERT
if (need_to_convert && bufr != NULL)
free(bufr);
#endif
if (xtend_size)
free(xtend_size);
if (fillvalue)
{
if (var->type_info->nc_type_class == NC_VLEN)
nc_free_vlen((nc_vlen_t *)fillvalue);
else if (var->type_info->nc_type_class == NC_STRING && *(char **)fillvalue)
free(*(char **)fillvalue);
free(fillvalue);
}
/* If there was an error return it, otherwise return any potential
range error value. If none, return NC_NOERR as usual.*/
if (retval)
return retval;
if (range_error)
return NC_ERANGE;
return NC_NOERR;
}

View File

@ -49,8 +49,8 @@ NC4_inq_varid,
NC4_rename_var,
NC4_get_vara,
NC4_put_vara,
NCDEFAULT_get_vars,
NCDEFAULT_put_vars,
NC4_get_vars,
NC4_put_vars,
NCDEFAULT_get_varm,
NCDEFAULT_put_varm,

View File

@ -1708,3 +1708,87 @@ NC4_get_vara(int ncid, int varid, const size_t *startp,
return nc4_get_vara(nc, ncid, varid, startp, countp, memtype,
0, (void *)ip);
}
/* Provide a temporary hook
to choose old NCDEFAULT methods vs new versions
of get/put vars
*/
/* Temporary flag to choose default vs not put/get vars */
static int defaultvars = 0;
void
nc4_set_default_vars(int tf)
{
defaultvars = (tf ? 1 : 0);
}
/**
* @internal Write an array of data to a variable. This is called by
* nc_put_vars() and other nc_put_vars_* functions, for netCDF-4
* files.
*
* @param ncid File ID.
* @param varid Variable ID.
* @param startp Array of start indices.
* @param countp Array of counts.
* @param stridep Array of strides.
* @param op pointer that gets the data.
* @param memtype The type of these data in memory.
*
* @returns ::NC_NOERR for success
* @author Dennis Heimbigner
*/
int
NC4_put_vars(int ncid, int varid, const size_t *startp,
const size_t *countp, const ptrdiff_t* stridep,
const void *op, int memtype)
{
NC *nc;
if(defaultvars)
return NCDEFAULT_put_vars(ncid,varid,startp,countp,stridep,op,memtype);
if (!(nc = nc4_find_nc_file(ncid, NULL)))
return NC_EBADID;
return nc4_put_vars(nc, ncid, varid, startp, countp, stridep, memtype, 0, (void *)op);
}
/**
* Read an array of values. This is called by nc_get_vars() for
* netCDF-4 files, as well as all the other nc_get_vars_*
* functions.
*
* @param ncid File ID.
* @param varid Variable ID.
* @param startp Array of start indices.
* @param countp Array of counts.
* @param stridep Array of strides.
* @param ip pointer that gets the data.
* @param memtype The type of these data after it is read into memory.
* @returns ::NC_NOERR for success
* @author Dennis Heimbigner
*/
int
NC4_get_vars(int ncid, int varid, const size_t *startp,
const size_t *countp, const ptrdiff_t *stridep,
void *ip, int memtype)
{
NC *nc;
NC_HDF5_FILE_INFO_T* h5;
if(defaultvars)
return NCDEFAULT_get_vars(ncid,varid,startp,countp,stridep,ip,memtype);
LOG((2, "%s: ncid 0x%x varid %d memtype %d", __func__, ncid, varid,
memtype));
if (!(nc = nc4_find_nc_file(ncid, &h5)))
return NC_EBADID;
/* Get the data. */
return nc4_get_vars(nc, ncid, varid, startp, countp, stridep, memtype,
0, (void *)ip);
}

View File

@ -1,8 +1,10 @@
# Test c output
T=tst_diskless
T=t
#ARGS=t
SRC=test_put.c test_get.c test_read.c test_write.c util.c error.c
#CMD=valgrind --leak-check=full
#CMD=export NETCDF_LOG_LEVEL=5 ;gdb --args
CMD=gdb --args

View File

@ -1,10 +1,10 @@
# Test c output
T=t_dap3a
T=tst_varsperf
#SRC=hdf5plugins/H5Zmisc.c
#CMD=valgrind --leak-check=full
CMD=gdb --args
#CMD=gdb --args
#FILTER=H5Zmisc
#FILTEROBJ=hdf5plugins/${FILTER}.o
@ -31,7 +31,7 @@ LLP=/usr/local/lib:${LD_LIBRARY_PATH}
all:: cmp
export LD_LIBRARY_PATH=${LLP}; export CFLAGS; export LDFLAGS; \
${CMD} ./t
${CMD} ./t ${ARGS}
cmp::
export LD_LIBRARY_PATH=${LLP}; export CFLAGS; export LDFLAGS; \

View File

@ -30,7 +30,7 @@ t_type cdm_sea_soundings tst_camrun tst_vl tst_atts1 tst_atts2 \
tst_vars2 tst_files5 tst_files6 tst_sync tst_h_scalar tst_rename \
tst_rename2 tst_h5_endians tst_atts_string_rewrite \
tst_hdf5_file_compat tst_fill_attr_vanish tst_rehash tst_filterparser \
tst_bug324 tst_types tst_atts3 tst_put_vars tst_elatefill
tst_bug324 tst_types tst_atts3 tst_put_vars tst_elatefill tst_varsperf
# Temporary I hope
if !ISCYGWIN
@ -150,7 +150,7 @@ tst_interops2.h4 tst_h5_endians.nc tst_h4_lendian.h4 test.nc \
tst_atts_string_rewrite.nc tst_empty_vlen_unlim.nc \
tst_empty_vlen_lim.nc tst_parallel4_simplerw_coll.nc \
tst_fill_attr_vanish.nc tst_rehash.nc testszip.nc test.h5 \
szip_dump.cdl perftest.txt bigmeta.nc
szip_dump.cdl perftest.txt bigmeta.nc bigvars.nc
DISTCLEANFILES = findplugin.sh

149
nc_test4/tst_varsperf.c Normal file
View File

@ -0,0 +1,149 @@
/*
Create a netcdf-4 file with
a large variable for the purpose
of doing performance test on the
new NC4_get/put_vars functions.
WARNING: do not attempt to run this
under windows because of the use
of gettimeofday().
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <sys/time.h>
#include <assert.h>
#include "netcdf.h"
#define FILE "bigvars.nc"
#define VAR "bigvar"
#define NDIMS 2
#define DIM0 "d0"
#define DIM1 "d1"
#define DIMSIZE0 1024
#define DIMSIZE1 1024
#define TOTALSIZE (DIMSIZE0*DIMSIZE1)
#define CHECK(expr) assert((expr) == NC_NOERR)
static int data[TOTALSIZE];
static int read[TOTALSIZE];
extern void nc4_set_default_vars(int);
void
buildfile(void)
{
int ncid, varid;
int dimids[NDIMS];
int* p;
int index;
CHECK(nc_create(FILE, NC_NETCDF4, &ncid));
CHECK(nc_def_dim(ncid,DIM0,(size_t)DIMSIZE0,&dimids[0]));
CHECK(nc_def_dim(ncid,DIM1,(size_t)DIMSIZE1,&dimids[1]));
CHECK(nc_def_var(ncid,VAR,NC_INT,NDIMS,dimids,&varid));
CHECK(nc_enddef(ncid));
for(p=data,index=0;index<TOTALSIZE;index++)
*p++ = index;
CHECK(nc_put_var_int(ncid,varid,data));
CHECK(nc_close(ncid));
}
long long
readfile(void)
{
int ncid, varid;
int ndims, i;
int dimids[NDIMS];
size_t dimsizes[NDIMS];
int vardims[NDIMS];
size_t start[NDIMS];
size_t count[NDIMS];
ptrdiff_t stride[NDIMS];
struct timeval starttime, endtime;
long long delta;
long long startt, endt;
memset(&starttime,0,sizeof(starttime));
memset(&endtime,0,sizeof(endtime));
/* re-open to read */
CHECK(nc_open(FILE, NC_NETCDF4, &ncid));
CHECK(nc_inq_dimid(ncid,DIM0,&dimids[0]));
CHECK(nc_inq_dimid(ncid,DIM1,&dimids[1]));
CHECK(nc_inq_dim(ncid,dimids[0],NULL,&dimsizes[0]));
CHECK(nc_inq_dim(ncid,dimids[1],NULL,&dimsizes[1]));
assert(dimsizes[0] == DIMSIZE0);
assert(dimsizes[1] == DIMSIZE1);
CHECK(nc_inq_varid(ncid,VAR,&varid));
CHECK(nc_inq_varndims(ncid,varid,&ndims));
assert(ndims == NDIMS);
CHECK(nc_inq_vardimid(ncid,varid,vardims));
for(i=0;i<NDIMS;i++)
assert(vardims[i] == dimids[i]);
/* Do the timed read */
for(i=0;i<NDIMS;i++) {
start[i] = 0;
count[i] = dimsizes[i]/2;
stride[i] = 2;
}
memset(read,0,sizeof(read));
gettimeofday(&starttime,NULL);
CHECK(nc_get_vars(ncid,varid,start,count,stride,read));
gettimeofday(&endtime,NULL);
CHECK(nc_close(ncid));
/* Verify read -- Note: NDIMS dependent */
{
int d0, d1;
i = 0;
for(d0=0;d0<DIMSIZE0;d0+=stride[0]) {
for(d1=0;d1<DIMSIZE1;d1+=stride[1]) {
size_t dataindex = (d0 * DIMSIZE0) + d1;
size_t readindex = i;
assert(data[dataindex] == read[readindex]);
i++;
}
}
}
/* Compute the time delta */
startt = (1000000*starttime.tv_sec) + starttime.tv_usec;
endt = (1000000*endtime.tv_sec) + endtime.tv_usec;
delta = endt - startt;
return delta;
}
int
main(int argc, char **argv)
{
long long defaultdelta, nc4delta, factor;
buildfile();
nc4_set_default_vars(1);
defaultdelta = readfile();
nc4_set_default_vars(0);
nc4delta = readfile();
/* Print results to the millisec */
factor = defaultdelta / nc4delta;
printf("NCDEFAULT time=%lld NC4 time=%lld Speedup=%lld\n",
defaultdelta, nc4delta, factor);
return 0;
}

View File

@ -449,7 +449,7 @@ processeconstrefsR(Datalist* data)
{
NCConstant* con = NULL;
int i;
for(i=0,con=data->data;i<data->alloc;i++,con++) {
for(i=0,con=data->data;i<data->length;i++,con++) {
if(con->nctype == NC_COMPOUND) {
/* Iterate over the sublists */
processeconstrefsR(con->value.compoundv);
@ -467,6 +467,7 @@ fixeconstref(NCConstant* con)
Symbol* refsym = con->value.enumv;
List* grpmatches;
/* Locate all possible matching enum constant definitions */
List* candidates = findecmatches(refsym->name);
if(candidates == NULL) {