netcdf-c/libhdf5/hdf5file.c

745 lines
21 KiB
C
Raw Normal View History

2018-02-08 21:20:58 +08:00
/* Copyright 2003-2018, University Corporation for Atmospheric
* Research. See COPYRIGHT file for copying and redistribution
* conditions. */
2017-12-05 03:21:14 +08:00
/**
* @file
* @internal The netCDF-4 file functions.
*
* This file is part of netcdf-4, a netCDF-like interface for HDF5, or
* a HDF5 backend for netCDF, depending on your point of view.
*
* @author Ed Hartnett
*/
2018-02-08 21:20:58 +08:00
#include "config.h"
#include "hdf5internal.h"
Revert/Improve nc_create + NC_DISKLESS behavior re: https://github.com/Unidata/netcdf-c/issues/1154 Inadvertently, the behavior of NC_DISKLESS with nc_create() was changed in release 4.6.1. Previously, the NC_WRITE flag needed to be explicitly used with NC_DISKLESS in order to cause the created file to be persisted to disk. Additional analyis indicated that the current NC_DISKLESS implementation was seriously flawed. This PR attempts to clean up and regularize the situation with respect to NC_DISKLESS control. One important aspect of diskless operation is that there are two different notions of write. 1. The file is read-write vs read-only when using the netcdf API. 2. The file is persisted or not to disk at nc_close(). Previously, these two were conflated. The rules now are as follows. 1. NC_DISKLESS + NC_WRITE means that the file is read/write using the netcdf API 2. NC_DISKLESS + NC_PERSIST means that the file is persisted to a disk file at nc_close. 3. NC_DISKLESS + NC_PERSIST + NC_WRITE means both 1 and 2. The NC_PERSIST flag is new and takes over the obsolete NC_MPIPOSIX flag. NC_MPIPOSIX is still defined, but is now an alias for the NC_MPIIO flag. It is also now the case that for netcdf-4, NC_DISKLESS is independent of NC_INMEMORY and in fact it is an error to specify both flags simultaneously. Finally, the MMAP code was fixed to use NC_PERSIST as well. Also marked MMAP as deprecated. Also added a test case to test various combinations of NC_DISKLESS, NC_PERSIST, and NC_WRITE. This PR affects a number of files and especially test cases that used NC_DISKLESS. Misc. Unrelated fixes 1. fixed some warnings in ncdump/dumplib.c
2018-10-11 03:32:17 +08:00
#include "ncrc.h"
#include "ncauth.h"
2010-06-03 21:24:43 +08:00
extern int NC4_extract_file_image(NC_FILE_INFO_T* h5, int abort); /* In nc4memcb.c */
2018-09-12 23:36:36 +08:00
static void dumpopenobjects(NC_FILE_INFO_T* h5);
2017-12-05 03:21:14 +08:00
/** @internal When we have open objects at file close, should
we log them or print to stdout. Default is to log. */
#define LOGOPEN 1
2018-09-07 02:19:17 +08:00
/* Forward */
static int NC4_enddef(int ncid);
static void dumpopenobjects(NC_FILE_INFO_T* h5);
/**
* @internal Recursively determine if there is a mismatch between
* order of coordinate creation and associated dimensions in this
* group or any subgroups, to find out if we have to handle that
* situation. Also check if there are any multidimensional coordinate
* variables defined, which require the same treatment to fix a
* potential bug when such variables occur in subgroups.
*
* @param grp Pointer to group info struct.
* @param bad_coord_orderp Pointer that gets 1 if there is a bad
* coordinate order.
*
* @returns NC_NOERR No error.
* @returns NC_EHDFERR HDF5 returned an error.
* @author Ed Hartnett
*/
static int
detect_preserve_dimids(NC_GRP_INFO_T *grp, nc_bool_t *bad_coord_orderp)
{
NC_VAR_INFO_T *var;
NC_GRP_INFO_T *child_grp;
int last_dimid = -1;
int retval;
int i;
/* Iterate over variables in this group */
for (i=0; i < ncindexsize(grp->vars); i++)
{
NC_HDF5_VAR_INFO_T *hdf5_var;
var = (NC_VAR_INFO_T*)ncindexith(grp->vars,i);
if (var == NULL) continue;
hdf5_var = (NC_HDF5_VAR_INFO_T*)var->format_var_info;
/* Only matters for dimension scale variables, with non-scalar dimensionality */
if (hdf5_var->dimscale && var->ndims)
{
/* If the user writes coord vars in a different order then he
* defined their dimensions, then, when the file is reopened, the
* order of the dimids will change to match the order of the coord
* vars. Detect if this is about to happen. */
if (var->dimids[0] < last_dimid)
{
LOG((5, "%s: %s is out of order coord var", __func__, var->hdr.name));
*bad_coord_orderp = NC_TRUE;
return NC_NOERR;
}
last_dimid = var->dimids[0];
/* If there are multidimensional coordinate variables defined, then
* it's also necessary to preserve dimension IDs when the file is
* reopened ... */
if (var->ndims > 1)
{
LOG((5, "%s: %s is multidimensional coord var", __func__, var->hdr.name));
*bad_coord_orderp = NC_TRUE;
return NC_NOERR;
}
/* Did the user define a dimension, end define mode, reenter define
* mode, and then define a coordinate variable for that dimension?
* If so, dimensions will be out of order. */
if (var->is_new_var || var->became_coord_var)
{
LOG((5, "%s: coord var defined after enddef/redef", __func__));
*bad_coord_orderp = NC_TRUE;
return NC_NOERR;
}
}
}
/* If there are any child groups, check them also for this condition. */
for (i = 0; i < ncindexsize(grp->children); i++)
{
if (!(child_grp = (NC_GRP_INFO_T *)ncindexith(grp->children, i)))
continue;
if ((retval = detect_preserve_dimids(child_grp, bad_coord_orderp)))
return retval;
}
return NC_NOERR;
}
2017-12-05 03:21:14 +08:00
/**
2018-08-07 00:49:31 +08:00
* @internal This function will write all changed metadata and flush
* HDF5 file to disk.
2017-12-05 03:21:14 +08:00
*
* @param h5 Pointer to HDF5 file info struct.
*
* @return ::NC_NOERR No error.
2018-08-07 00:49:31 +08:00
* @return ::NC_EINDEFINE Classic model file in define mode.
* @return ::NC_EHDFERR HDF5 error.
2017-12-05 03:21:14 +08:00
* @author Ed Hartnett
*/
2017-12-05 03:21:14 +08:00
static int
sync_netcdf4_file(NC_FILE_INFO_T *h5)
2017-12-05 03:21:14 +08:00
{
NC_HDF5_FILE_INFO_T *hdf5_info;
int retval;
assert(h5 && h5->format_file_info);
LOG((3, "%s", __func__));
/* If we're in define mode, that's an error, for strict nc3 rules,
* otherwise, end define mode. */
if (h5->flags & NC_INDEF)
{
if (h5->cmode & NC_CLASSIC_MODEL)
return NC_EINDEFINE;
/* Turn define mode off. */
h5->flags ^= NC_INDEF;
/* Redef mode needs to be tracked separately for nc_abort. */
h5->redef = NC_FALSE;
}
2017-12-05 03:21:14 +08:00
#ifdef LOGGING
/* This will print out the names, types, lens, etc of the vars and
atts in the file, if the logging level is 2 or greater. */
log_metadata_nc(h5);
2017-12-05 03:21:14 +08:00
#endif
/* Write any metadata that has changed. */
if (!h5->no_write)
{
nc_bool_t bad_coord_order = NC_FALSE;
/* Write any user-defined types. */
if ((retval = nc4_rec_write_groups_types(h5->root_grp)))
return retval;
2018-08-07 00:49:31 +08:00
/* Check to see if the coordinate order is messed up. If
* detected, propagate to all groups to consistently store
* dimids. */
if ((retval = detect_preserve_dimids(h5->root_grp, &bad_coord_order)))
return retval;
2018-08-07 00:49:31 +08:00
/* Write all the metadata. */
if ((retval = nc4_rec_write_metadata(h5->root_grp, bad_coord_order)))
return retval;
There was a request to extend the provenance information stored in the _NCProperties attribute to allow two things: 1. capture of additional library dependencies (over and above hdf5) 2. Recognition of non-netcdf libraries that create netcdf-4 format files. To this end, the _NCProperties format has been extended to be and arbitrary set of key=value pairs separated by commas. This new format has version = 2, and uses commas as the pair separator. Thus the general form is: _NCProperties = "version=2,key1=value,key2=value2..." ; This new version is accompanied by a new ./configure option of the form --with-ncproperties="key1=value1,key2=value2..." that specifies pairs to add to the _NCProperties attribute for all files created with that netcdf library. At this point, what is missing is some programmatic way to specify either all the pairs or additional pairs to the _NCProperties attribute. Not sure of the best way to do this. Builders using non-netcdf libraries can specify whatever they want in the key value pairs (as long as the version=2 is specified first). By convention, the primary library is expected to be the the first pair after the leading version=2 pair, but this is convention only and is neither required nor enforced. Related changes: 1. Fixed the tests that check _NCProperties to properly operate with version=2. 2. When reading a version 1 _NCProperties attribute, convert it to look like a version 2 attribute. 2. Added some version 2 tests to ncdump/tst_fileinfo.c and ncdump/tst_fileinfo.sh Misc Changes: 1. Fix minor problem in ncdap_test/testurl.sh where a parameter to buildurl needed to be quoted. 2. Minor fix to ncgen to swap switches -H and -h to be consistent with other utilities. 3. Document the -M flag in nccopy usage() and the nccopy man page. 4. Modify a test case to use the nccopy -M flag.
2018-08-26 11:44:41 +08:00
/* Write out provenance*/
if((retval = NC4_write_provenance(h5)))
return retval;
}
/* Tell HDF5 to flush all changes to the file. */
hdf5_info = (NC_HDF5_FILE_INFO_T *)h5->format_file_info;
if (H5Fflush(hdf5_info->hdfid, H5F_SCOPE_GLOBAL) < 0)
return NC_EHDFERR;
2014-07-10 06:45:13 +08:00
return NC_NOERR;
2017-12-05 03:21:14 +08:00
}
2014-07-10 06:45:13 +08:00
2017-12-05 03:21:14 +08:00
/**
* @internal This function will free all allocated metadata memory,
* and close the HDF5 file. The group that is passed in must be the
* root group of the file. If inmemory is used, then save
* the final memory in mem.memio.
2017-12-05 03:21:14 +08:00
*
* @param h5 Pointer to HDF5 file info struct.
* @param abort True if this is an abort.
* @param memio the place to return a core image if not NULL
2017-12-05 03:21:14 +08:00
*
* @return ::NC_NOERR No error.
* @return ::NC_EHDFERR HDF5 could not close the file.
2018-08-22 00:20:32 +08:00
* @return ::NC_EINDEFINE Classic model file is in define mode.
* @author Ed Hartnett, Dennis Heimbigner
*/
int
nc4_close_netcdf4_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio)
2010-06-03 21:24:43 +08:00
{
NC_HDF5_FILE_INFO_T *hdf5_info;
int retval;
2010-06-03 21:24:43 +08:00
assert(h5 && h5->root_grp && h5->format_file_info);
LOG((3, "%s: h5->path %s abort %d", __func__, h5->controller->path, abort));
/* Get HDF5 specific info. */
hdf5_info = (NC_HDF5_FILE_INFO_T *)h5->format_file_info;
2010-06-03 21:24:43 +08:00
#ifdef USE_PARALLEL4
/* Free the MPI Comm & Info objects, if we opened the file in
* parallel. */
if (h5->parallel)
{
if (h5->comm != MPI_COMM_NULL)
MPI_Comm_free(&h5->comm);
if (h5->info != MPI_INFO_NULL)
MPI_Info_free(&h5->info);
}
#endif
2010-06-03 21:24:43 +08:00
/* Free the fileinfo struct, which holds info from the fileinfo
* hidden attribute. */
NC4_clear_provenance(&h5->provenance);
#if defined(ENABLE_BYTERANGE)
ncurifree(hdf5_info->uri);
#if defined(ENABLE_HDF5_ROS3) || defined(ENABLE_S3_SDK)
/* Free the http info */
NC_authfree(hdf5_info->auth);
#endif
#endif
/* Close hdf file. It may not be open, since this function is also
* called by NC_create() when a file opening is aborted. */
if (hdf5_info->hdfid > 0 && H5Fclose(hdf5_info->hdfid) < 0)
{
dumpopenobjects(h5);
return NC_EHDFERR;
}
/* If inmemory is used and user wants the final memory block,
then capture and return the final memory block else free it */
2019-07-17 06:17:07 +08:00
if (h5->mem.inmemory)
{
/* Pull out the final memory */
(void)NC4_extract_file_image(h5, abort);
2019-07-17 06:17:07 +08:00
if (!abort && memio != NULL)
{
*memio = h5->mem.memio; /* capture it */
h5->mem.memio.memory = NULL; /* avoid duplicate free */
}
/* If needed, reclaim extraneous memory */
2019-07-17 06:17:07 +08:00
if (h5->mem.memio.memory != NULL)
{
/* If the original block of memory is not resizeable, then
Revert/Improve nc_create + NC_DISKLESS behavior re: https://github.com/Unidata/netcdf-c/issues/1154 Inadvertently, the behavior of NC_DISKLESS with nc_create() was changed in release 4.6.1. Previously, the NC_WRITE flag needed to be explicitly used with NC_DISKLESS in order to cause the created file to be persisted to disk. Additional analyis indicated that the current NC_DISKLESS implementation was seriously flawed. This PR attempts to clean up and regularize the situation with respect to NC_DISKLESS control. One important aspect of diskless operation is that there are two different notions of write. 1. The file is read-write vs read-only when using the netcdf API. 2. The file is persisted or not to disk at nc_close(). Previously, these two were conflated. The rules now are as follows. 1. NC_DISKLESS + NC_WRITE means that the file is read/write using the netcdf API 2. NC_DISKLESS + NC_PERSIST means that the file is persisted to a disk file at nc_close. 3. NC_DISKLESS + NC_PERSIST + NC_WRITE means both 1 and 2. The NC_PERSIST flag is new and takes over the obsolete NC_MPIPOSIX flag. NC_MPIPOSIX is still defined, but is now an alias for the NC_MPIIO flag. It is also now the case that for netcdf-4, NC_DISKLESS is independent of NC_INMEMORY and in fact it is an error to specify both flags simultaneously. Finally, the MMAP code was fixed to use NC_PERSIST as well. Also marked MMAP as deprecated. Also added a test case to test various combinations of NC_DISKLESS, NC_PERSIST, and NC_WRITE. This PR affects a number of files and especially test cases that used NC_DISKLESS. Misc. Unrelated fixes 1. fixed some warnings in ncdump/dumplib.c
2018-10-11 03:32:17 +08:00
it belongs to the caller and we should not free it. */
if(!h5->mem.locked)
free(h5->mem.memio.memory);
}
h5->mem.memio.memory = NULL;
h5->mem.memio.size = 0;
NC4_image_finalize(h5->mem.udata);
Revert/Improve nc_create + NC_DISKLESS behavior re: https://github.com/Unidata/netcdf-c/issues/1154 Inadvertently, the behavior of NC_DISKLESS with nc_create() was changed in release 4.6.1. Previously, the NC_WRITE flag needed to be explicitly used with NC_DISKLESS in order to cause the created file to be persisted to disk. Additional analyis indicated that the current NC_DISKLESS implementation was seriously flawed. This PR attempts to clean up and regularize the situation with respect to NC_DISKLESS control. One important aspect of diskless operation is that there are two different notions of write. 1. The file is read-write vs read-only when using the netcdf API. 2. The file is persisted or not to disk at nc_close(). Previously, these two were conflated. The rules now are as follows. 1. NC_DISKLESS + NC_WRITE means that the file is read/write using the netcdf API 2. NC_DISKLESS + NC_PERSIST means that the file is persisted to a disk file at nc_close. 3. NC_DISKLESS + NC_PERSIST + NC_WRITE means both 1 and 2. The NC_PERSIST flag is new and takes over the obsolete NC_MPIPOSIX flag. NC_MPIPOSIX is still defined, but is now an alias for the NC_MPIIO flag. It is also now the case that for netcdf-4, NC_DISKLESS is independent of NC_INMEMORY and in fact it is an error to specify both flags simultaneously. Finally, the MMAP code was fixed to use NC_PERSIST as well. Also marked MMAP as deprecated. Also added a test case to test various combinations of NC_DISKLESS, NC_PERSIST, and NC_WRITE. This PR affects a number of files and especially test cases that used NC_DISKLESS. Misc. Unrelated fixes 1. fixed some warnings in ncdump/dumplib.c
2018-10-11 03:32:17 +08:00
}
/* Free the HDF5-specific info. */
if (h5->format_file_info) {
NC_HDF5_FILE_INFO_T* hdf5_file = (NC_HDF5_FILE_INFO_T*)h5->format_file_info;
free(hdf5_file);
}
2019-07-17 06:02:08 +08:00
/* Free the NC_FILE_INFO_T struct. */
if ((retval = nc4_nc4f_list_del(h5)))
return retval;
return NC_NOERR;
2010-06-03 21:24:43 +08:00
}
/**
* @internal This function will recurse through an open HDF5 file and
* release resources. All open HDF5 objects in the file will be
* closed.
*
* @param h5 Pointer to HDF5 file info struct.
* @param abort True if this is an abort.
* @param memio the place to return a core image if not NULL
*
* @return ::NC_NOERR No error.
* @return ::NC_EHDFERR HDF5 could not close the file.
* @author Ed Hartnett
*/
int
nc4_close_hdf5_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio)
{
int retval;
assert(h5 && h5->root_grp && h5->format_file_info);
LOG((3, "%s: h5->path %s abort %d", __func__, h5->controller->path, abort));
/* According to the docs, always end define mode on close. */
if (h5->flags & NC_INDEF)
h5->flags ^= NC_INDEF;
/* Sync the file, unless we're aborting, or this is a read-only
* file. */
if (!h5->no_write && !abort)
if ((retval = sync_netcdf4_file(h5)))
return retval;
/* Close all open HDF5 objects within the file. */
if ((retval = nc4_rec_grp_HDF5_del(h5->root_grp)))
return retval;
/* Release all internal lists and metadata associated with this
* file. All HDF5 objects have already been released. */
if ((retval = nc4_close_netcdf4_file(h5, abort, memio)))
return retval;
return NC_NOERR;
}
2018-09-07 03:57:39 +08:00
/**
* @internal Output a list of still-open objects in the HDF5
* file. This is only called if the file fails to close cleanly.
*
* @param h5 Pointer to file info.
*
* @author Dennis Heimbigner
*/
static void
2018-09-07 03:57:39 +08:00
dumpopenobjects(NC_FILE_INFO_T* h5)
{
NC_HDF5_FILE_INFO_T *hdf5_info;
int nobjs;
assert(h5 && h5->format_file_info);
hdf5_info = (NC_HDF5_FILE_INFO_T *)h5->format_file_info;
if(hdf5_info->hdfid <= 0)
return; /* File was never opened */
nobjs = H5Fget_obj_count(hdf5_info->hdfid, H5F_OBJ_ALL);
/* Apparently we can get an error even when nobjs == 0 */
if(nobjs < 0) {
return;
} else if(nobjs > 0) {
char msg[1024];
int logit = 0;
/* If the close doesn't work, probably there are still some HDF5
* objects open, which means there's a bug in the library. So
* print out some info on to help the poor programmer figure it
* out. */
snprintf(msg,sizeof(msg),"There are %d HDF5 objects open!", nobjs);
2018-09-07 03:57:39 +08:00
#ifdef LOGGING
#ifdef LOGOPEN
LOG((0, msg));
logit = 1;
2018-09-07 03:57:39 +08:00
#endif
#else
fprintf(stdout,"%s\n",msg);
logit = 0;
2018-09-07 03:57:39 +08:00
#endif
reportopenobjects(logit,hdf5_info->hdfid);
fflush(stderr);
}
2018-09-07 03:57:39 +08:00
return;
2018-09-07 03:57:39 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal Unfortunately HDF only allows specification of fill value
* only when a dataset is created. Whereas in netcdf, you first create
* the variable and then (optionally) specify the fill value. To
* accomplish this in HDF5 I have to delete the dataset, and recreate
* it, with the fill value specified.
2017-12-05 03:21:14 +08:00
*
* @param ncid File and group ID.
* @param fillmode File mode.
* @param old_modep Pointer that gets old mode. Ignored if NULL.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
2014-07-10 06:45:13 +08:00
int
2010-06-03 21:24:43 +08:00
NC4_set_fill(int ncid, int fillmode, int *old_modep)
{
NC_FILE_INFO_T *nc4_info;
int retval;
2014-07-10 06:45:13 +08:00
LOG((2, "%s: ncid 0x%x fillmode %d", __func__, ncid, fillmode));
2010-06-03 21:24:43 +08:00
/* Get pointer to file info. */
if ((retval = nc4_find_grp_h5(ncid, NULL, &nc4_info)))
return retval;
assert(nc4_info);
2010-06-03 21:24:43 +08:00
/* Trying to set fill on a read-only file? You sicken me! */
if (nc4_info->no_write)
return NC_EPERM;
2010-06-03 21:24:43 +08:00
/* Did you pass me some weird fillmode? */
if (fillmode != NC_FILL && fillmode != NC_NOFILL)
return NC_EINVAL;
2010-06-03 21:24:43 +08:00
/* If the user wants to know, tell him what the old mode was. */
if (old_modep)
*old_modep = nc4_info->fill_mode;
2010-06-03 21:24:43 +08:00
nc4_info->fill_mode = fillmode;
2013-03-01 05:50:55 +08:00
return NC_NOERR;
2010-06-03 21:24:43 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal Put the file back in redef mode. This is done
* automatically for netcdf-4 files, if the user forgets.
2017-12-05 03:21:14 +08:00
*
* @param ncid File and group ID.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
2010-06-03 21:24:43 +08:00
int
NC4_redef(int ncid)
{
NC_FILE_INFO_T *nc4_info;
int retval;
2010-06-03 21:24:43 +08:00
LOG((1, "%s: ncid 0x%x", __func__, ncid));
2010-06-03 21:24:43 +08:00
/* Find this file's metadata. */
if ((retval = nc4_find_grp_h5(ncid, NULL, &nc4_info)))
return retval;
assert(nc4_info);
2010-06-03 21:24:43 +08:00
/* If we're already in define mode, return an error for classic
* files, or netCDF/HDF5 files when classic mode is in use. */
if (nc4_info->flags & NC_INDEF)
return (nc4_info->cmode & NC_CLASSIC_MODEL) ? NC_EINDEFINE : NC_NOERR;
2010-06-03 21:24:43 +08:00
/* If the file is read-only, return an error. */
if (nc4_info->no_write)
return NC_EPERM;
2010-06-03 21:24:43 +08:00
/* Set define mode. */
nc4_info->flags |= NC_INDEF;
2010-06-03 21:24:43 +08:00
/* For nc_abort, we need to remember if we're in define mode as a
redef. */
nc4_info->redef = NC_TRUE;
2010-06-03 21:24:43 +08:00
return NC_NOERR;
2010-06-03 21:24:43 +08:00
}
2018-09-15 01:39:57 +08:00
/**
* @internal For netcdf-4 files, this just calls nc_enddef, ignoring
* the extra parameters.
*
* @param ncid File and group ID.
* @param h_minfree Ignored for netCDF-4 files.
* @param v_align Ignored for netCDF-4 files.
* @param v_minfree Ignored for netCDF-4 files.
* @param r_align Ignored for netCDF-4 files.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
int
NC4__enddef(int ncid, size_t h_minfree, size_t v_align,
size_t v_minfree, size_t r_align)
{
return NC4_enddef(ncid);
2018-09-15 01:39:57 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal Take the file out of define mode. This is called
* automatically for netcdf-4 files, if the user forgets.
2017-12-05 03:21:14 +08:00
*
* @param ncid File and group ID.
*
* @return ::NC_NOERR No error.
2018-08-07 00:49:31 +08:00
* @return ::NC_EBADID Bad ncid.
* @return ::NC_EBADGRPID Bad group ID.
2017-12-05 03:21:14 +08:00
* @author Ed Hartnett
*/
2018-08-21 22:40:53 +08:00
static int
NC4_enddef(int ncid)
2010-06-03 21:24:43 +08:00
{
NC_FILE_INFO_T *nc4_info;
NC_GRP_INFO_T *grp;
int retval;
Fix various problem around VLEN's re: https://github.com/Unidata/netcdf-c/issues/541 re: https://github.com/Unidata/netcdf-c/issues/1208 re: https://github.com/Unidata/netcdf-c/issues/2078 re: https://github.com/Unidata/netcdf-c/issues/2041 re: https://github.com/Unidata/netcdf-c/issues/2143 For a long time, there have been known problems with the management of complex types containing VLENs. This also involves the string type because it is stored as a VLEN of chars. This PR (mostly) fixes this problem. But note that it adds new functions to netcdf.h (see below) and this may require bumping the .so number. These new functions can be removed, if desired, in favor of functions in netcdf_aux.h, but netcdf.h seems the better place for them because they are intended as alternatives to the nc_free_vlen and nc_free_string functions already in netcdf.h. The term complex type refers to any type that directly or transitively references a VLEN type. So an array of VLENS, a compound with a VLEN field, and so on. In order to properly handle instances of these complex types, it is necessary to have function that can recursively walk instances of such types to perform various actions on them. The term "deep" is also used to mean recursive. At the moment, the two operations needed by the netcdf library are: * free'ing an instance of the complex type * copying an instance of the complex type. The current library does only shallow free and shallow copy of complex types. This means that only the top level is properly free'd or copied, but deep internal blocks in the instance are not touched. Note that the term "vector" will be used to mean a contiguous (in memory) sequence of instances of some type. Given an array with, say, dimensions 2 X 3 X 4, this will be stored in memory as a vector of length 2*3*4=24 instances. The use cases are primarily these. ## nc_get_vars Suppose one is reading a vector of instances using nc_get_vars (or nc_get_vara or nc_get_var, etc.). These functions will return the vector in the top-level memory provided. All interior blocks (form nested VLEN or strings) will have been dynamically allocated. After using this vector of instances, it is necessary to free (aka reclaim) the dynamically allocated memory, otherwise a memory leak occurs. So, the recursive reclaim function is used to walk the returned instance vector and do a deep reclaim of the data. Currently functions are defined in netcdf.h that are supposed to handle this: nc_free_vlen(), nc_free_vlens(), and nc_free_string(). Unfortunately, these functions only do a shallow free, so deeply nested instances are not properly handled by them. Note that internally, the provided data is immediately written so there is no need to copy it. But the caller may need to reclaim the data it passed into the function. ## nc_put_att Suppose one is writing a vector of instances as the data of an attribute using, say, nc_put_att. Internally, the incoming attribute data must be copied and stored so that changes/reclamation of the input data will not affect the attribute. Again, the code inside the netcdf library does only shallow copying rather than deep copy. As a result, one sees effects such as described in Github Issue https://github.com/Unidata/netcdf-c/issues/2143. Also, after defining the attribute, it may be necessary for the user to free the data that was provided as input to nc_put_att(). ## nc_get_att Suppose one is reading a vector of instances as the data of an attribute using, say, nc_get_att. Internally, the existing attribute data must be copied and returned to the caller, and the caller is responsible for reclaiming the returned data. Again, the code inside the netcdf library does only shallow copying rather than deep copy. So this can lead to memory leaks and errors because the deep data is shared between the library and the user. # Solution The solution is to build properly recursive reclaim and copy functions and use those as needed. These recursive functions are defined in libdispatch/dinstance.c and their signatures are defined in include/netcdf.h. For back compatibility, corresponding "ncaux_XXX" functions are defined in include/netcdf_aux.h. ```` int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy); int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp); ```` There are two variants. The first two, nc_reclaim_data() and nc_copy_data(), assume the top-level vector is managed by the caller. For reclaim, this is so the user can use, for example, a statically allocated vector. For copy, it assumes the user provides the space into which the copy is stored. The second two, nc_reclaim_data_all() and nc_copy_data_all(), allows the functions to manage the top-level. So for nc_reclaim_data_all, the top level is assumed to be dynamically allocated and will be free'd by nc_reclaim_data_all(). The nc_copy_data_all() function will allocate the top level and return a pointer to it to the user. The user can later pass that pointer to nc_reclaim_data_all() to reclaim the instance(s). # Internal Changes The netcdf-c library internals are changed to use the proper reclaim and copy functions. It turns out that the places where these functions are needed is quite pervasive in the netcdf-c library code. Using these functions also allows some simplification of the code since the stdata and vldata fields of NC_ATT_INFO are no longer needed. Currently this is commented out using the SEPDATA \#define macro. When any bugs are largely fixed, all this code will be removed. # Known Bugs 1. There is still one known failure that has not been solved. All the failures revolve around some variant of this .cdl file. The proximate cause of failure is the use of a VLEN FillValue. ```` netcdf x { types: float(*) row_of_floats ; dimensions: m = 5 ; variables: row_of_floats ragged_array(m) ; row_of_floats ragged_array:_FillValue = {-999} ; data: ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32}, {40, 41}, _ ; } ```` When a solution is found, I will either add it to this PR or post a new PR. # Related Changes * Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data. * Remove the --enable-unfixed-memory-leaks option. * Remove the NC_VLENS_NOTEST code that suppresses some vlen tests. * Document this change in docs/internal.md * Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh. * Mark types as fixed size or not (transitively) to optimize the reclaim and copy functions. # Misc. Changes * Make Doxygen process libdispatch/daux.c * Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
int i;
NC_VAR_INFO_T* var = NULL;
LOG((1, "%s: ncid 0x%x", __func__, ncid));
/* Find pointer to group and nc4_info. */
if ((retval = nc4_find_grp_h5(ncid, &grp, &nc4_info)))
return retval;
Fix various problem around VLEN's re: https://github.com/Unidata/netcdf-c/issues/541 re: https://github.com/Unidata/netcdf-c/issues/1208 re: https://github.com/Unidata/netcdf-c/issues/2078 re: https://github.com/Unidata/netcdf-c/issues/2041 re: https://github.com/Unidata/netcdf-c/issues/2143 For a long time, there have been known problems with the management of complex types containing VLENs. This also involves the string type because it is stored as a VLEN of chars. This PR (mostly) fixes this problem. But note that it adds new functions to netcdf.h (see below) and this may require bumping the .so number. These new functions can be removed, if desired, in favor of functions in netcdf_aux.h, but netcdf.h seems the better place for them because they are intended as alternatives to the nc_free_vlen and nc_free_string functions already in netcdf.h. The term complex type refers to any type that directly or transitively references a VLEN type. So an array of VLENS, a compound with a VLEN field, and so on. In order to properly handle instances of these complex types, it is necessary to have function that can recursively walk instances of such types to perform various actions on them. The term "deep" is also used to mean recursive. At the moment, the two operations needed by the netcdf library are: * free'ing an instance of the complex type * copying an instance of the complex type. The current library does only shallow free and shallow copy of complex types. This means that only the top level is properly free'd or copied, but deep internal blocks in the instance are not touched. Note that the term "vector" will be used to mean a contiguous (in memory) sequence of instances of some type. Given an array with, say, dimensions 2 X 3 X 4, this will be stored in memory as a vector of length 2*3*4=24 instances. The use cases are primarily these. ## nc_get_vars Suppose one is reading a vector of instances using nc_get_vars (or nc_get_vara or nc_get_var, etc.). These functions will return the vector in the top-level memory provided. All interior blocks (form nested VLEN or strings) will have been dynamically allocated. After using this vector of instances, it is necessary to free (aka reclaim) the dynamically allocated memory, otherwise a memory leak occurs. So, the recursive reclaim function is used to walk the returned instance vector and do a deep reclaim of the data. Currently functions are defined in netcdf.h that are supposed to handle this: nc_free_vlen(), nc_free_vlens(), and nc_free_string(). Unfortunately, these functions only do a shallow free, so deeply nested instances are not properly handled by them. Note that internally, the provided data is immediately written so there is no need to copy it. But the caller may need to reclaim the data it passed into the function. ## nc_put_att Suppose one is writing a vector of instances as the data of an attribute using, say, nc_put_att. Internally, the incoming attribute data must be copied and stored so that changes/reclamation of the input data will not affect the attribute. Again, the code inside the netcdf library does only shallow copying rather than deep copy. As a result, one sees effects such as described in Github Issue https://github.com/Unidata/netcdf-c/issues/2143. Also, after defining the attribute, it may be necessary for the user to free the data that was provided as input to nc_put_att(). ## nc_get_att Suppose one is reading a vector of instances as the data of an attribute using, say, nc_get_att. Internally, the existing attribute data must be copied and returned to the caller, and the caller is responsible for reclaiming the returned data. Again, the code inside the netcdf library does only shallow copying rather than deep copy. So this can lead to memory leaks and errors because the deep data is shared between the library and the user. # Solution The solution is to build properly recursive reclaim and copy functions and use those as needed. These recursive functions are defined in libdispatch/dinstance.c and their signatures are defined in include/netcdf.h. For back compatibility, corresponding "ncaux_XXX" functions are defined in include/netcdf_aux.h. ```` int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy); int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp); ```` There are two variants. The first two, nc_reclaim_data() and nc_copy_data(), assume the top-level vector is managed by the caller. For reclaim, this is so the user can use, for example, a statically allocated vector. For copy, it assumes the user provides the space into which the copy is stored. The second two, nc_reclaim_data_all() and nc_copy_data_all(), allows the functions to manage the top-level. So for nc_reclaim_data_all, the top level is assumed to be dynamically allocated and will be free'd by nc_reclaim_data_all(). The nc_copy_data_all() function will allocate the top level and return a pointer to it to the user. The user can later pass that pointer to nc_reclaim_data_all() to reclaim the instance(s). # Internal Changes The netcdf-c library internals are changed to use the proper reclaim and copy functions. It turns out that the places where these functions are needed is quite pervasive in the netcdf-c library code. Using these functions also allows some simplification of the code since the stdata and vldata fields of NC_ATT_INFO are no longer needed. Currently this is commented out using the SEPDATA \#define macro. When any bugs are largely fixed, all this code will be removed. # Known Bugs 1. There is still one known failure that has not been solved. All the failures revolve around some variant of this .cdl file. The proximate cause of failure is the use of a VLEN FillValue. ```` netcdf x { types: float(*) row_of_floats ; dimensions: m = 5 ; variables: row_of_floats ragged_array(m) ; row_of_floats ragged_array:_FillValue = {-999} ; data: ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32}, {40, 41}, _ ; } ```` When a solution is found, I will either add it to this PR or post a new PR. # Related Changes * Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data. * Remove the --enable-unfixed-memory-leaks option. * Remove the NC_VLENS_NOTEST code that suppresses some vlen tests. * Document this change in docs/internal.md * Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh. * Mark types as fixed size or not (transitively) to optimize the reclaim and copy functions. # Misc. Changes * Make Doxygen process libdispatch/daux.c * Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
/* Why is this here? Especially since it is not recursive so it
only applies to the this grp */
/* When exiting define mode, mark all variable written. */
for (i = 0; i < ncindexsize(grp->vars); i++)
{
var = (NC_VAR_INFO_T *)ncindexith(grp->vars, i);
assert(var);
var->written_to = NC_TRUE;
}
return nc4_enddef_netcdf4_file(nc4_info);
2010-06-03 21:24:43 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal Flushes all buffers associated with the file, after
* writing all changed metadata. This may only be called in data mode.
*
* @param ncid File and group ID.
*
* @return ::NC_NOERR No error.
2018-08-22 00:20:32 +08:00
* @return ::NC_EBADID Bad ncid.
* @return ::NC_EINDEFINE Classic model file is in define mode.
2017-12-05 03:21:14 +08:00
* @author Ed Hartnett
*/
2010-06-03 21:24:43 +08:00
int
NC4_sync(int ncid)
{
NC_FILE_INFO_T *nc4_info;
int retval;
2010-06-03 21:24:43 +08:00
LOG((2, "%s: ncid 0x%x", __func__, ncid));
2010-06-03 21:24:43 +08:00
if ((retval = nc4_find_grp_h5(ncid, NULL, &nc4_info)))
return retval;
assert(nc4_info);
2010-06-03 21:24:43 +08:00
/* If we're in define mode, we can't sync. */
if (nc4_info->flags & NC_INDEF)
{
if (nc4_info->cmode & NC_CLASSIC_MODEL)
return NC_EINDEFINE;
if ((retval = NC4_enddef(ncid)))
return retval;
}
2010-06-03 21:24:43 +08:00
return sync_netcdf4_file(nc4_info);
2010-06-03 21:24:43 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal From the netcdf-3 docs: The function nc_abort just closes
* the netCDF dataset, if not in define mode. If the dataset is being
* created and is still in define mode, the dataset is deleted. If
* define mode was entered by a call to nc_redef, the netCDF dataset
* is restored to its state before definition mode was entered and the
* dataset is closed.
2017-12-05 03:21:14 +08:00
*
* @param ncid File and group ID.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
2010-06-03 21:24:43 +08:00
int
NC4_abort(int ncid)
{
NC *nc;
NC_FILE_INFO_T *nc4_info;
int delete_file = 0;
char path[NC_MAX_NAME + 1];
int retval;
LOG((2, "%s: ncid 0x%x", __func__, ncid));
/* Find metadata for this file. */
if ((retval = nc4_find_nc_grp_h5(ncid, &nc, NULL, &nc4_info)))
return retval;
assert(nc4_info);
/* If we're in define mode, but not redefing the file, delete it. */
if (nc4_info->flags & NC_INDEF && !nc4_info->redef)
{
delete_file++;
strncpy(path, nc->path, NC_MAX_NAME);
}
/* Free any resources the netcdf-4 library has for this file's
* metadata. */
if ((retval = nc4_close_hdf5_file(nc4_info, 1, NULL)))
return retval;
/* Delete the file, if we should. */
if (delete_file)
if (remove(path) < 0)
return NC_ECANTREMOVE;
return NC_NOERR;
2010-06-03 21:24:43 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal Close the netcdf file, writing any changes first.
2017-12-05 03:21:14 +08:00
*
* @param ncid File and group ID.
* @param params any extra parameters in/out of close
2017-12-05 03:21:14 +08:00
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
2010-06-03 21:24:43 +08:00
int
NC4_close(int ncid, void* params)
2010-06-03 21:24:43 +08:00
{
NC_GRP_INFO_T *grp;
NC_FILE_INFO_T *h5;
int retval;
int inmemory;
NC_memio* memio = NULL;
2010-06-03 21:24:43 +08:00
LOG((1, "%s: ncid 0x%x", __func__, ncid));
2010-06-03 21:24:43 +08:00
/* Find our metadata for this file. */
if ((retval = nc4_find_grp_h5(ncid, &grp, &h5)))
return retval;
2010-06-03 21:24:43 +08:00
assert(h5 && grp);
2010-11-30 06:23:16 +08:00
/* This must be the root group. */
if (grp->parent)
return NC_EBADGRPID;
2010-11-30 06:23:16 +08:00
inmemory = ((h5->cmode & NC_INMEMORY) == NC_INMEMORY);
if(inmemory && params != NULL) {
memio = (NC_memio*)params;
}
/* Call the nc4 close. */
if ((retval = nc4_close_hdf5_file(grp->nc4_info, 0, memio)))
return retval;
return NC_NOERR;
}
2017-12-05 03:21:14 +08:00
/**
* @internal Learn number of dimensions, variables, global attributes,
* and the ID of the first unlimited dimension (if any).
*
* @note It's possible for any of these pointers to be NULL, in which
* case don't try to figure out that value.
*
* @param ncid File and group ID.
* @param ndimsp Pointer that gets number of dimensions.
* @param nvarsp Pointer that gets number of variables.
* @param nattsp Pointer that gets number of global attributes.
* @param unlimdimidp Pointer that gets first unlimited dimension ID,
* or -1 if there are no unlimied dimensions.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
2010-06-03 21:24:43 +08:00
int
NC4_inq(int ncid, int *ndimsp, int *nvarsp, int *nattsp, int *unlimdimidp)
{
NC *nc;
NC_FILE_INFO_T *h5;
NC_GRP_INFO_T *grp;
int retval;
int i;
LOG((2, "%s: ncid 0x%x", __func__, ncid));
/* Find file metadata. */
if ((retval = nc4_find_nc_grp_h5(ncid, &nc, &grp, &h5)))
return retval;
assert(h5 && grp && nc);
/* Count the number of dims, vars, and global atts; need to iterate
* because of possible nulls. */
if (ndimsp)
{
*ndimsp = ncindexcount(grp->dim);
}
if (nvarsp)
{
*nvarsp = ncindexcount(grp->vars);
}
if (nattsp)
{
/* Do we need to read the atts? */
if (!grp->atts_read)
if ((retval = nc4_read_atts(grp, NULL)))
return retval;
*nattsp = ncindexcount(grp->att);
}
if (unlimdimidp)
{
/* Default, no unlimited dimension */
*unlimdimidp = -1;
/* If there's more than one unlimited dim, which was not possible
with netcdf-3, then only the last unlimited one will be reported
back in xtendimp. */
/* Note that this code is inconsistent with nc_inq_unlimid() */
for(i=0;i<ncindexsize(grp->dim);i++) {
NC_DIM_INFO_T* d = (NC_DIM_INFO_T*)ncindexith(grp->dim,i);
if(d == NULL) continue;
if(d->unlimited) {
*unlimdimidp = d->hdr.id;
break;
}
}
}
2018-06-19 19:05:44 +08:00
return NC_NOERR;
2010-06-03 21:24:43 +08:00
}
2017-12-05 03:21:14 +08:00
/**
* @internal This function will do the enddef stuff for a netcdf-4 file.
2017-12-05 03:21:14 +08:00
*
* @param h5 Pointer to HDF5 file info struct.
*
* @return ::NC_NOERR No error.
2018-08-07 00:49:31 +08:00
* @return ::NC_ENOTINDEFINE Not in define mode.
2017-12-05 03:21:14 +08:00
* @author Ed Hartnett
*/
2010-06-03 21:24:43 +08:00
int
nc4_enddef_netcdf4_file(NC_FILE_INFO_T *h5)
2010-06-03 21:24:43 +08:00
{
assert(h5);
LOG((3, "%s", __func__));
2010-06-03 21:24:43 +08:00
/* If we're not in define mode, return an error. */
if (!(h5->flags & NC_INDEF))
return NC_ENOTINDEFINE;
2010-06-03 21:24:43 +08:00
/* Turn define mode off. */
h5->flags ^= NC_INDEF;
2010-06-03 21:24:43 +08:00
/* Redef mode needs to be tracked separately for nc_abort. */
h5->redef = NC_FALSE;
2010-06-03 21:24:43 +08:00
return sync_netcdf4_file(h5);
2010-06-03 21:24:43 +08:00
}