netcdf-c/ncdump/ncdump.c

2574 lines
71 KiB
C
Raw Normal View History

/*! \file
Copyright 2018 University Corporation for Atmospheric
Research/Unidata. See \ref copyright file for more info. */
2010-06-03 21:24:43 +08:00
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
#include "config.h"
2010-06-03 21:24:43 +08:00
#include <stdio.h>
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#if defined(_WIN32) && !defined(__MINGW32__)
#include "XGetopt.h"
#endif
#ifdef HAVE_UNISTD_H
2010-06-03 21:24:43 +08:00
#include <unistd.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
2012-09-11 06:37:38 +08:00
2010-06-03 21:24:43 +08:00
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <assert.h>
#include <math.h>
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
#include "netcdf.h"
#include "netcdf_mem.h"
Add support for multiple filters per variable. re: https://github.com/Unidata/netcdf-c/issues/1584 Support has been added for multiple filters per variable. This affects a number of components in netcdf. The new APIs are documented in NUG/filters.md. The primary changes are: * A set of new functions are provided (see __include/netcdf_filter.h__). - Obtain a list of the filters associated with a variable - Obtain the parameters for a specific filter. * The existing __nc_inq_var_filter__ function now returns info about the first defined filter. * The utilities (ncgen, ncdump, and nccopy) now support an extended format for specifying a sequence of filters. The general form is __<filter>|<filter>..._. * The ncdump **_Filter** attribute now dumps a list of all the filters associated with a variable using the above new format. * Filter specifications can now use a filter name instead of number for filters known to the netcdf library, which in turn is taken from the HDF5 filter registration page. * New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter is returned if an attempt is made to access an unknown filter. * Internally, the dispatch table has been extended to add a function to handle all of the filter functions. * New, filter-related, tests were added to nc_test4. * A new plugin was added to the plugins directory to help with testing. Notes: 1. The shuffle and fletcher32 filters are not part of the multifilter system. Misc. changes: 1. A debug module was added to libhdf5 to help catch error locations.
2020-02-17 03:59:33 +08:00
#include "netcdf_filter.h"
#include "netcdf_aux.h"
#include "utils.h"
#include "nccomps.h"
2011-09-19 04:57:51 +08:00
#include "nctime0.h" /* new iso time and calendar stuff */
2010-06-03 21:24:43 +08:00
#include "dumplib.h"
2011-09-22 07:10:03 +08:00
#include "ncdump.h"
2010-06-03 21:24:43 +08:00
#include "vardata.h"
#include "indent.h"
#include "isnan.h"
#include "cdl.h"
#include "nclog.h"
#include "ncpathmgr.h"
#include "nclist.h"
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
#include "ncuri.h"
#include "nc_provenance.h"
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
#include "ncpathmgr.h"
#ifdef USE_NETCDF4
#include "nc4internal.h" /* to get name of the special properties file */
#endif
#define XML_VERSION "1.0"
2010-06-03 21:24:43 +08:00
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
#define LPAREN "("
#define RPAREN ")"
2010-06-03 21:24:43 +08:00
#define int64_t long long
#define uint64_t unsigned long long
/* If we have a variable named one of these:
we need to be careful about printing their attributes.
*/
static const char* keywords[] = {
2020-05-30 13:22:37 +08:00
"variables",
"dimensions",
"data",
"group",
"types",
NULL
};
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
/*Forward*/
static int searchgrouptreedim(int ncid, int dimid, int* parentidp);
extern int nc__testurl(const char*,char**);
static int iskeyword(const char* kw)
{
const char** p;
for(p=keywords;*p;p++) {
if(strcmp(kw,*p)==0) return 1;
}
return 0;
}
/* globals */
2010-06-03 21:24:43 +08:00
char *progname;
fspec_t formatting_specs = /* defaults, overridden by command-line options */
{
0, /* construct netcdf name from file name */
false, /* print header info only, no data? */
false, /* just print coord vars? */
false, /* brief comments in data section? */
false, /* full annotations in data section? */
false, /* human-readable output for date-time values? */
false, /* use 'T' separator between date and time values as strings? */
false, /* output special attributes, eg chunking? */
LANG_C, /* language conventions for indices */
false, /* for DAP URLs, client-side cache used */
2011-09-22 07:10:03 +08:00
0, /* if -v specified, number of variables in list */
0, /* if -v specified, list of variable names */
0, /* if -g specified, number of groups names in list */
0, /* if -g specified, list of group names */
0, /* if -g specified, list of matching grpids */
0 /* kind of netCDF file */
};
2010-06-03 21:24:43 +08:00
static void
usage(void)
{
#define USAGE "\
[-c] Coordinate variable data and header information\n\
[-h] Header information only, no data\n\
[-v var1[,...]] Data for variable(s) <var1>,... only\n\
[-b [c|f]] Brief annotations for C or Fortran indices in data\n\
[-f [c|f]] Full annotations for C or Fortran indices in data\n\
[-l len] Line length maximum in data section (default 80)\n\
[-n name] Name for netCDF (default derived from file name)\n\
[-p n[,n]] Display floating-point values with less precision\n\
[-k] Output kind of netCDF file\n\
[-s] Output special (virtual) attributes\n\
[-t] Output time data as date-time strings\n\
[-i] Output time data as date-time strings with ISO-8601 'T' separator\n\
2012-05-02 06:17:59 +08:00
[-g grp1[,...]] Data and metadata for group(s) <grp1>,... only\n\
[-w] With client-side caching of variables for DAP URLs\n\
[-x] Output XML (NcML) instead of CDL\n\
[-Xp] Unconditionally suppress output of the properties attribute\n\
[-Ln] Set log level to n (>= 0); ignore if logging not enabled.\n\
file Name of netCDF file (or URL if DAP access enabled)\n"
2010-06-03 21:24:43 +08:00
(void) fprintf(stderr,
"%s [-c|-h] [-v ...] [[-b|-f] [c|f]] [-l len] [-n name] [-p n[,n]] [-k] [-x] [-s] [-t|-i] [-g ...] [-w] [-Ln] file\n%s",
2010-06-03 21:24:43 +08:00
progname,
USAGE);
2010-06-03 21:24:43 +08:00
(void) fprintf(stderr,
"netcdf library version %s\n",
nc_inq_libvers());
}
/*
* convert pathname of netcdf file into name for cdl unit, by taking
2010-06-03 21:24:43 +08:00
* last component of path and stripping off any extension.
* DMH: add code to handle OPeNDAP url.
* DMH: I think this also works for UTF8.
*/
static char *
name_path(const char *path)
{
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
char* cvtpath = NULL;
const char *cp = NULL;
char *sp = NULL;
size_t cplen = 0;
char* base = NULL;
2010-06-03 21:24:43 +08:00
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
if((cvtpath = NCpathcvt(path))==NULL)
return NULL;
2010-06-03 21:24:43 +08:00
/* See if this is a url */
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
if(nc__testurl(cvtpath,&base))
goto done; /* Looks like a url */
/* else fall thru and treat like a file path */
2010-06-03 21:24:43 +08:00
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
cp = strrchr(cvtpath, '/');
if (cp == NULL) /* no delimiter */
cp = cvtpath;
else /* skip delimiter */
2010-06-03 21:24:43 +08:00
cp++;
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
cplen = strlen(cp);
base = (char *) emalloc((unsigned) (cplen+1));
base[0] = '\0';
strlcat(base,cp,cplen+1);
if ((sp = strrchr(base, '.')) != NULL)
2010-06-03 21:24:43 +08:00
*sp = '\0'; /* strip off any extension */
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
done:
nullfree(cvtpath);
return base;
2010-06-03 21:24:43 +08:00
}
/* Return primitive type name */
static const char *
prim_type_name(nc_type type)
{
switch (type) {
case NC_BYTE:
return "byte";
case NC_CHAR:
return "char";
case NC_SHORT:
return "short";
case NC_INT:
return "int";
case NC_FLOAT:
return "float";
case NC_DOUBLE:
return "double";
case NC_UBYTE:
return "ubyte";
case NC_USHORT:
return "ushort";
case NC_UINT:
return "uint";
case NC_INT64:
return "int64";
case NC_UINT64:
return "uint64";
case NC_STRING:
return "string";
case NC_VLEN:
return "vlen";
case NC_OPAQUE:
return "opaque";
case NC_COMPOUND:
return "compound";
default:
error("prim_type_name: bad type %d", type);
return "bogus";
}
}
/*
* Remove trailing zeros (after decimal point) but not trailing decimal
* point from ss, a string representation of a floating-point number that
* might include an exponent part.
*/
static void
tztrim(char *ss)
{
char *cp, *ep;
2010-06-03 21:24:43 +08:00
cp = ss;
if (*cp == '-')
cp++;
while(isdigit((int)*cp) || *cp == '.')
cp++;
if (*--cp == '.')
return;
ep = cp+1;
while (*cp == '0')
cp--;
cp++;
if (cp == ep)
return;
while (*ep)
*cp++ = *ep++;
*cp = '\0';
return;
}
/* Return file type string */
static const char *
kind_string(int kind)
{
switch (kind) {
case NC_FORMAT_CLASSIC:
return "classic";
2015-08-16 06:26:35 +08:00
case NC_FORMAT_64BIT_OFFSET:
2010-06-03 21:24:43 +08:00
return "64-bit offset";
2015-08-16 06:26:35 +08:00
case NC_FORMAT_CDF5:
return "cdf5";
2010-06-03 21:24:43 +08:00
case NC_FORMAT_NETCDF4:
return "netCDF-4";
case NC_FORMAT_NETCDF4_CLASSIC:
return "netCDF-4 classic model";
default:
error("unrecognized file format: %d", kind);
2010-06-03 21:24:43 +08:00
return "unrecognized";
}
}
/* Return extended format string */
static const char *
kind_string_extended(int kind, int mode)
{
static char text[1024];
switch (kind) {
2015-08-16 06:26:35 +08:00
case NC_FORMATX_NC3:
if(mode & NC_CDF5)
snprintf(text,sizeof(text),"%s mode=%08x", "64-bit data",mode);
else if(mode & NC_64BIT_OFFSET)
snprintf(text,sizeof(text),"%s mode=%08x", "64-bit offset",mode);
else
snprintf(text,sizeof(text),"%s mode=%08x", "classic",mode);
break;
2015-08-16 06:26:35 +08:00
case NC_FORMATX_NC_HDF5:
snprintf(text,sizeof(text),"%s mode=%08x", "HDF5",mode);
break;
2015-08-16 06:26:35 +08:00
case NC_FORMATX_NC_HDF4:
snprintf(text,sizeof(text),"%s mode=%08x", "HDF4",mode);
break;
2015-08-16 06:26:35 +08:00
case NC_FORMATX_PNETCDF:
snprintf(text,sizeof(text),"%s mode=%08x", "PNETCDF",mode);
break;
2015-08-16 06:26:35 +08:00
case NC_FORMATX_DAP2:
snprintf(text,sizeof(text),"%s mode=%08x", "DAP2",mode);
break;
2015-08-16 06:26:35 +08:00
case NC_FORMATX_DAP4:
snprintf(text,sizeof(text),"%s mode=%08x", "DAP4",mode);
break;
2015-08-16 06:26:35 +08:00
case NC_FORMATX_UNDEFINED:
snprintf(text,sizeof(text),"%s mode=%08x", "unknown",mode);
break;
default:
error("unrecognized extended format: %d",kind);
snprintf(text,sizeof(text),"%s mode=%08x", "unrecognized",mode);
break;
}
return text;
}
#if 0
static int
fileopen(const char* path, void** memp, size_t* sizep)
{
int status = NC_NOERR;
int fd = -1;
int oflags = 0;
2015-08-03 07:22:50 +08:00
off_t size = 0;
void* mem = NULL;
off_t red = 0;
char* pos = NULL;
/* Open the file, but make sure we can write it if needed */
oflags = O_RDONLY;
#ifdef O_BINARY
oflags |= O_BINARY;
#endif
oflags |= O_EXCL;
#ifdef vms
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
fd = NCopen3(path, oflags, 0, "ctx=stm");
#else
2017-04-15 01:05:30 +08:00
fd = NCopen2(path, oflags);
#endif
if(fd < 0) {
status = errno;
2015-05-30 06:04:22 +08:00
goto done;
}
/* get current filesize = max(|file|,initialize)*/
size = lseek(fd,0,SEEK_END);
if(size < 0) {status = errno; goto done;}
/* move pointer back to beginning of file */
(void)lseek(fd,0,SEEK_SET);
mem = malloc(size);
if(mem == NULL) {status = NC_ENOMEM; goto done;}
/* Read the file into memory */
/* We need to do multiple reads because there is no
guarantee that the amount read will be the full amount */
red = size;
pos = (char*)mem;
while(red > 0) {
ssize_t count = read(fd, pos, red);
if(count < 0) {status = errno; goto done;}
if(count == 0) {status = NC_ENOTNC; goto done;}
2015-05-30 06:04:22 +08:00
/* assert(count > 0) */
red -= count;
pos += count;
}
done:
2015-05-30 06:04:22 +08:00
if(fd >= 0)
(void)close(fd);
if(status != NC_NOERR) {
#ifndef DEBUG
fprintf(stderr,"open failed: file=%s err=%d\n",path,status);
fflush(stderr);
#endif
}
if(status != NC_NOERR && mem != NULL) {
free(mem);
mem = NULL;
} else {
if(sizep) *sizep = size;
if(memp) {
*memp = mem;
} else if(mem) {
free(mem);
}
}
return status;
}
#endif
/*
2010-06-03 21:24:43 +08:00
* Emit initial line of output for NcML
*/
static void
2010-06-03 21:24:43 +08:00
pr_initx(int ncid, const char *path)
{
2021-11-12 01:47:49 +08:00
printf("<?xml version=\"%s\" encoding=\"UTF-8\"?>\n<netcdf xmlns=\"https://www.unidata.ucar.edu/namespaces/netcdf/ncml-2.2\" location=\"%s\">\n",
XML_VERSION, path);
2010-06-03 21:24:43 +08:00
}
/*
* Print attribute string, for text attributes.
*/
static void
pr_att_string(
int kind,
size_t len,
const char *string
)
{
int iel;
const char *cp;
const char *sp;
unsigned char uc;
cp = string;
printf ("\"");
/* adjust len so trailing nulls don't get printed */
sp = cp + len - 1;
while (len != 0 && *sp-- == '\0')
len--;
for (iel = 0; iel < len; iel++)
switch (uc = *cp++ & 0377) {
case '\b':
printf ("\\b");
break;
case '\f':
printf ("\\f");
break;
case '\n':
2010-06-03 21:24:43 +08:00
/* Only generate linebreaks after embedded newlines for
2015-08-16 06:26:35 +08:00
* classic, 64-bit offset, cdf5, or classic model files. For
2010-06-03 21:24:43 +08:00
* netCDF-4 files, don't generate linebreaks, because that
* would create an extra string in a list of strings. */
if (kind != NC_FORMAT_NETCDF4) {
printf ("\\n\",\n\t\t\t\"");
} else {
printf("\\n");
}
break;
case '\r':
printf ("\\r");
break;
case '\t':
printf ("\\t");
break;
case '\v':
printf ("\\v");
break;
case '\\':
printf ("\\\\");
break;
case '\'':
printf ("\\\'");
2010-06-03 21:24:43 +08:00
break;
case '\"':
printf ("\\\"");
break;
default:
if (iscntrl(uc))
printf ("\\%03o",uc);
else
printf ("%c",uc);
break;
}
printf ("\"");
}
/*
* Print NcML attribute string, for text attributes.
*/
static void
pr_attx_string(
const char* attname,
2010-06-03 21:24:43 +08:00
size_t len,
const char *string
)
{
int iel;
const char *cp;
const char *sp;
unsigned char uc;
int nulcount = 0;
2010-06-03 21:24:43 +08:00
cp = string;
printf ("\"");
/* adjust len so trailing nulls don't get printed */
sp = cp + len - 1;
while (len != 0 && *sp-- == '\0')
len--;
for (iel = 0; iel < len; iel++)
switch (uc = *cp++ & 0377) {
case '\"':
printf ("&quot;");
break;
case '<':
printf ("&lt;");
break;
case '>':
printf ("&gt;");
break;
case '&':
printf ("&amp;");
break;
case '\n':
printf ("&#xA;");
break;
case '\r':
printf ("&#xD;");
break;
case '\t':
printf ("&#x9;");
break;
case '\0':
printf ("&#0;");
if(nulcount++ == 0)
fprintf(stderr,"Attribute: '%s'; value contains nul characters; producing illegal xml\n",attname);
break;
2010-06-03 21:24:43 +08:00
default:
if (iscntrl(uc))
printf ("&#%d;",uc);
else
printf ("%c",uc);
break;
}
printf ("\"");
}
/*
* Print list of attribute values, for attributes of primitive types.
* Attribute values must be printed with explicit type tags for
* netCDF-3 primitive types, because CDL doesn't require explicit
* syntax to declare such attribute types.
2010-06-03 21:24:43 +08:00
*/
static void
pr_att_valgs(
int kind,
nc_type type,
size_t len,
const void *vals
)
{
int iel;
signed char sc;
short ss;
int ii;
char gps[PRIM_LEN];
float ff;
double dd;
unsigned char uc;
unsigned short us;
unsigned int ui;
int64_t i64;
uint64_t ui64;
2015-08-16 06:26:35 +08:00
#ifdef USE_NETCDF4
2010-06-03 21:24:43 +08:00
char *stringp;
#endif /* USE_NETCDF4 */
char *delim = ", "; /* delimiter between output values */
if (type == NC_CHAR) {
char *cp = (char *) vals;
pr_att_string(kind, len, cp);
return;
}
/* else */
for (iel = 0; iel < len; iel++) {
if (iel == len - 1)
delim = "";
switch (type) {
case NC_BYTE:
sc = ((signed char *) vals)[iel];
printf ("%db%s", sc, delim);
break;
case NC_SHORT:
ss = ((short *) vals)[iel];
printf ("%ds%s", ss, delim);
break;
case NC_INT:
ii = ((int *) vals)[iel];
printf ("%d%s", ii, delim);
break;
case NC_FLOAT:
ff = ((float *) vals)[iel];
if(isfinite(ff)) {
int res;
res = snprintf(gps, PRIM_LEN, float_att_fmt, ff);
assert(res < PRIM_LEN);
tztrim(gps); /* trim trailing 0's after '.' */
printf ("%s%s", gps, delim);
} else {
if(isnan(ff)) {
printf("NaNf%s", delim);
} else if(isinf(ff)) {
if(ff < 0.0f) {
printf("-");
}
printf("Infinityf%s", delim);
}
}
break;
case NC_DOUBLE:
dd = ((double *) vals)[iel];
if(isfinite(dd)) {
int res;
res = snprintf(gps, PRIM_LEN, double_att_fmt, dd);
assert(res < PRIM_LEN);
tztrim(gps);
printf ("%s%s", gps, delim);
} else {
if(isnan(dd)) {
printf("NaN%s", delim);
} else if(isinf(dd)) {
if(dd < 0.0) {
printf("-");
}
printf("Infinity%s", delim);
}
}
break;
case NC_UBYTE:
uc = ((unsigned char *) vals)[iel];
printf ("%uUB%s", uc, delim);
break;
case NC_USHORT:
us = ((unsigned short *) vals)[iel];
2011-08-26 11:54:25 +08:00
printf ("%huUS%s", us, delim);
2010-06-03 21:24:43 +08:00
break;
case NC_UINT:
ui = ((unsigned int *) vals)[iel];
printf ("%uU%s", ui, delim);
break;
case NC_INT64:
i64 = ((int64_t *) vals)[iel];
2015-11-20 04:44:07 +08:00
printf ("%lldLL%s", i64, delim);
2010-06-03 21:24:43 +08:00
break;
case NC_UINT64:
ui64 = ((uint64_t *) vals)[iel];
2015-11-20 04:44:07 +08:00
printf ("%lluULL%s", ui64, delim);
2010-06-03 21:24:43 +08:00
break;
2015-08-16 06:26:35 +08:00
#ifdef USE_NETCDF4
2010-06-03 21:24:43 +08:00
case NC_STRING:
stringp = ((char **) vals)[iel];
if(stringp)
pr_att_string(kind, strlen(stringp), stringp);
else
printf("NIL");
2010-06-03 21:24:43 +08:00
printf("%s", delim);
break;
#endif /* USE_NETCDF4 */
default:
error("pr_att_vals: bad type");
}
}
}
/*
* Print list of numeric attribute values to string for use in NcML output.
* Unlike CDL, NcML makes type explicit, so don't need type suffixes.
*/
static void
pr_att_valsx(
nc_type type,
size_t len,
const double *vals,
char *attvals, /* returned string */
size_t attvalslen /* size of attvals buffer, assumed
large enough to hold all len
blank-separated values */
)
{
int iel;
float ff;
double dd;
int ii;
unsigned int ui;
int64_t i64;
uint64_t ui64;
attvals[0]='\0';
if (len == 0)
return;
for (iel = 0; iel < len; iel++) {
char gps[PRIM_LEN];
int res;
switch (type) {
case NC_BYTE:
case NC_SHORT:
case NC_INT:
ii = vals[iel];
res = snprintf(gps, PRIM_LEN, "%d", ii);
assert(res < PRIM_LEN);
(void) strlcat(attvals, gps, attvalslen);
(void) strlcat(attvals, iel < len-1 ? " " : "", attvalslen);
break;
case NC_UBYTE:
case NC_USHORT:
case NC_UINT:
ui = vals[iel];
res = snprintf(gps, PRIM_LEN, "%u", ui);
assert(res < PRIM_LEN);
(void) strlcat(attvals, gps, attvalslen);
(void) strlcat(attvals, iel < len-1 ? " " : "", attvalslen);
break;
case NC_INT64:
i64 = vals[iel];
res = snprintf(gps, PRIM_LEN, "%lld", i64);
assert(res < PRIM_LEN);
(void) strlcat(attvals, gps, attvalslen);
(void) strlcat(attvals, iel < len-1 ? " " : "", attvalslen);
break;
case NC_UINT64:
ui64 = vals[iel];
res = snprintf(gps, PRIM_LEN, "%llu", ui64);
assert(res < PRIM_LEN);
(void) strlcat(attvals, gps, attvalslen);
(void) strlcat(attvals, iel < len-1 ? " " : "", attvalslen);
break;
case NC_FLOAT:
ff = vals[iel];
res = snprintf(gps, PRIM_LEN, float_attx_fmt, ff);
assert(res < PRIM_LEN);
tztrim(gps); /* trim trailing 0's after '.' */
(void) strlcat(attvals, gps, attvalslen);
(void) strlcat(attvals, iel < len-1 ? " " : "", attvalslen);
break;
case NC_DOUBLE:
dd = vals[iel];
res = snprintf(gps, PRIM_LEN, double_att_fmt, dd);
assert(res < PRIM_LEN);
tztrim(gps); /* trim trailing 0's after '.' */
(void) strlcat(attvals, gps, attvalslen);
(void) strlcat(attvals, iel < len-1 ? " " : "", attvalslen);
break;
default:
error("pr_att_valsx: bad type");
}
}
}
/*
2010-06-03 21:24:43 +08:00
* Print a variable attribute
*/
static void
pr_att(
int ncid,
int kind,
int varid,
const char *varname,
int ia
)
{
ncatt_t att; /* attribute */
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_attname(ncid, varid, ia, att.name) );
#ifdef USE_NETCDF4
if (ncid == getrootid(ncid)
&& varid == NC_GLOBAL
&& strcmp(att.name,NCPROPS)==0)
2019-09-18 10:27:43 +08:00
return; /* will be printed elsewhere */
#endif
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_att(ncid, varid, att.name, &att.type, &att.len) );
att.tinfo = get_typeinfo(att.type);
indent_out();
printf ("\t\t");
#ifdef USE_NETCDF4
if (is_user_defined_type(att.type) || att.type == NC_STRING)
#else
if (is_user_defined_type(att.type))
#endif
{
/* TODO: omit next two lines if att_type_name not needed
* because print_type_name() looks it up */
char att_type_name[NC_MAX_NAME + 1];
get_type_name(ncid, att.type, att_type_name);
/* printf ("\t\t%s ", att_type_name); */
/* ... but handle special characters in CDL names with escapes */
print_type_name(ncid, att.type);
printf(" ");
}
/* printf ("\t\t%s:%s = ", varname, att.name); */
print_name(varname);
if(iskeyword(varname)) /* see discussion about escapes in ncgen man page*/
printf(" ");
2010-06-03 21:24:43 +08:00
printf(":");
print_name(att.name);
printf(" = ");
if (att.len == 0) { /* show 0-length attributes as empty strings */
att.type = NC_CHAR;
}
if (! is_user_defined_type(att.type) ) {
att.valgp = (void *) emalloc((att.len + 1) * att.tinfo->size );
NC_CHECK( nc_get_att(ncid, varid, att.name, att.valgp ) );
if(att.type == NC_CHAR) /* null-terminate retrieved text att value */
((char *)att.valgp)[att.len] = '\0';
/* (1) Print normal list of attribute values. */
pr_att_valgs(kind, att.type, att.len, att.valgp);
printf (" ;"); /* terminator for normal list */
/* (2) If -t option, add list of date/time strings as CDL comments. */
if(formatting_specs.string_times) {
/* Prints text after semicolon and before final newline.
* Prints nothing if not qualified for time interpretation.
* Will include line breaks for longer lists. */
print_att_times(ncid, varid, &att);
if(is_bounds_att(&att)) {
insert_bounds_info(ncid, varid, &att);
}
}
2010-06-03 21:24:43 +08:00
#ifdef USE_NETCDF4
/* If NC_STRING, need to free all the strings also */
if(att.type == NC_STRING) {
nc_free_string(att.len, att.valgp);
}
#endif /* USE_NETCDF4 */
free(att.valgp);
}
#ifdef USE_NETCDF4
else /* User-defined type. */
{
char type_name[NC_MAX_NAME + 1];
size_t type_size, nfields;
nc_type base_nc_type;
int class, i;
void *data = NULL;
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_user_type(ncid, att.type, type_name, &type_size,
2010-06-03 21:24:43 +08:00
&base_nc_type, &nfields, &class));
switch(class)
{
case NC_VLEN:
/* because size returned for vlen is base type size, but we
* need space to read array of vlen structs into ... */
data = emalloc((att.len + 1) * sizeof(nc_vlen_t));
2010-06-03 21:24:43 +08:00
break;
case NC_OPAQUE:
data = emalloc((att.len + 1) * type_size);
2010-06-03 21:24:43 +08:00
break;
case NC_ENUM:
/* a long long is ample for all base types */
data = emalloc((att.len + 1) * sizeof(int64_t));
2010-06-03 21:24:43 +08:00
break;
case NC_COMPOUND:
data = emalloc((att.len + 1) * type_size);
2010-06-03 21:24:43 +08:00
break;
default:
error("unrecognized class of user defined type: %d", class);
}
NC_CHECK( nc_get_att(ncid, varid, att.name, data));
switch(class) {
case NC_VLEN:
pr_any_att_vals(&att, data);
break;
case NC_OPAQUE: {
char *sout = emalloc(2 * type_size + strlen("0X") + 1);
unsigned char *cp = data;
for (i = 0; i < att.len; i++) {
(void) ncopaque_val_as_hex(type_size, sout, cp);
printf("%s%s", sout, i < att.len-1 ? ", " : "");
cp += type_size;
}
free(sout);
} break;
2010-06-03 21:24:43 +08:00
case NC_ENUM: {
int64_t value;
for (i = 0; i < att.len; i++) {
char enum_name[NC_MAX_NAME + 1];
switch(base_nc_type)
{
case NC_BYTE:
value = *((char *)data + i);
break;
case NC_UBYTE:
value = *((unsigned char *)data + i);
break;
case NC_SHORT:
value = *((short *)data + i);
break;
case NC_USHORT:
value = *((unsigned short *)data + i);
break;
case NC_INT:
value = *((int *)data + i);
break;
case NC_UINT:
value = *((unsigned int *)data + i);
break;
case NC_INT64:
value = *((int64_t *)data + i);
break;
case NC_UINT64:
value = *((uint64_t *)data + i);
break;
default:
error("enum must have an integer base type: %d", base_nc_type);
2010-06-03 21:24:43 +08:00
}
NC_CHECK( nc_inq_enum_ident(ncid, att.type, value,
2010-06-03 21:24:43 +08:00
enum_name));
/* printf("%s%s", enum_name, i < att.len-1 ? ", " : ""); */
print_name(enum_name);
printf("%s", i < att.len-1 ? ", " : "");
}
} break;
2010-06-03 21:24:43 +08:00
case NC_COMPOUND:
pr_any_att_vals(&att, data);
break;
default:
error("unrecognized class of user defined type: %d", class);
}
Fix various problem around VLEN's re: https://github.com/Unidata/netcdf-c/issues/541 re: https://github.com/Unidata/netcdf-c/issues/1208 re: https://github.com/Unidata/netcdf-c/issues/2078 re: https://github.com/Unidata/netcdf-c/issues/2041 re: https://github.com/Unidata/netcdf-c/issues/2143 For a long time, there have been known problems with the management of complex types containing VLENs. This also involves the string type because it is stored as a VLEN of chars. This PR (mostly) fixes this problem. But note that it adds new functions to netcdf.h (see below) and this may require bumping the .so number. These new functions can be removed, if desired, in favor of functions in netcdf_aux.h, but netcdf.h seems the better place for them because they are intended as alternatives to the nc_free_vlen and nc_free_string functions already in netcdf.h. The term complex type refers to any type that directly or transitively references a VLEN type. So an array of VLENS, a compound with a VLEN field, and so on. In order to properly handle instances of these complex types, it is necessary to have function that can recursively walk instances of such types to perform various actions on them. The term "deep" is also used to mean recursive. At the moment, the two operations needed by the netcdf library are: * free'ing an instance of the complex type * copying an instance of the complex type. The current library does only shallow free and shallow copy of complex types. This means that only the top level is properly free'd or copied, but deep internal blocks in the instance are not touched. Note that the term "vector" will be used to mean a contiguous (in memory) sequence of instances of some type. Given an array with, say, dimensions 2 X 3 X 4, this will be stored in memory as a vector of length 2*3*4=24 instances. The use cases are primarily these. ## nc_get_vars Suppose one is reading a vector of instances using nc_get_vars (or nc_get_vara or nc_get_var, etc.). These functions will return the vector in the top-level memory provided. All interior blocks (form nested VLEN or strings) will have been dynamically allocated. After using this vector of instances, it is necessary to free (aka reclaim) the dynamically allocated memory, otherwise a memory leak occurs. So, the recursive reclaim function is used to walk the returned instance vector and do a deep reclaim of the data. Currently functions are defined in netcdf.h that are supposed to handle this: nc_free_vlen(), nc_free_vlens(), and nc_free_string(). Unfortunately, these functions only do a shallow free, so deeply nested instances are not properly handled by them. Note that internally, the provided data is immediately written so there is no need to copy it. But the caller may need to reclaim the data it passed into the function. ## nc_put_att Suppose one is writing a vector of instances as the data of an attribute using, say, nc_put_att. Internally, the incoming attribute data must be copied and stored so that changes/reclamation of the input data will not affect the attribute. Again, the code inside the netcdf library does only shallow copying rather than deep copy. As a result, one sees effects such as described in Github Issue https://github.com/Unidata/netcdf-c/issues/2143. Also, after defining the attribute, it may be necessary for the user to free the data that was provided as input to nc_put_att(). ## nc_get_att Suppose one is reading a vector of instances as the data of an attribute using, say, nc_get_att. Internally, the existing attribute data must be copied and returned to the caller, and the caller is responsible for reclaiming the returned data. Again, the code inside the netcdf library does only shallow copying rather than deep copy. So this can lead to memory leaks and errors because the deep data is shared between the library and the user. # Solution The solution is to build properly recursive reclaim and copy functions and use those as needed. These recursive functions are defined in libdispatch/dinstance.c and their signatures are defined in include/netcdf.h. For back compatibility, corresponding "ncaux_XXX" functions are defined in include/netcdf_aux.h. ```` int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy); int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp); ```` There are two variants. The first two, nc_reclaim_data() and nc_copy_data(), assume the top-level vector is managed by the caller. For reclaim, this is so the user can use, for example, a statically allocated vector. For copy, it assumes the user provides the space into which the copy is stored. The second two, nc_reclaim_data_all() and nc_copy_data_all(), allows the functions to manage the top-level. So for nc_reclaim_data_all, the top level is assumed to be dynamically allocated and will be free'd by nc_reclaim_data_all(). The nc_copy_data_all() function will allocate the top level and return a pointer to it to the user. The user can later pass that pointer to nc_reclaim_data_all() to reclaim the instance(s). # Internal Changes The netcdf-c library internals are changed to use the proper reclaim and copy functions. It turns out that the places where these functions are needed is quite pervasive in the netcdf-c library code. Using these functions also allows some simplification of the code since the stdata and vldata fields of NC_ATT_INFO are no longer needed. Currently this is commented out using the SEPDATA \#define macro. When any bugs are largely fixed, all this code will be removed. # Known Bugs 1. There is still one known failure that has not been solved. All the failures revolve around some variant of this .cdl file. The proximate cause of failure is the use of a VLEN FillValue. ```` netcdf x { types: float(*) row_of_floats ; dimensions: m = 5 ; variables: row_of_floats ragged_array(m) ; row_of_floats ragged_array:_FillValue = {-999} ; data: ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32}, {40, 41}, _ ; } ```` When a solution is found, I will either add it to this PR or post a new PR. # Related Changes * Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data. * Remove the --enable-unfixed-memory-leaks option. * Remove the NC_VLENS_NOTEST code that suppresses some vlen tests. * Document this change in docs/internal.md * Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh. * Mark types as fixed size or not (transitively) to optimize the reclaim and copy functions. # Misc. Changes * Make Doxygen process libdispatch/daux.c * Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
NC_CHECK(nc_reclaim_data_all(ncid,att.type,data,att.len));
printf (" ;"); /* terminator for user defined types */
2010-06-03 21:24:43 +08:00
}
#endif /* USE_NETCDF4 */
printf ("\n"); /* final newline for all attribute types */
2010-06-03 21:24:43 +08:00
}
/* Common code for printing attribute name */
static void
pr_att_name(
int ncid,
const char *varname,
const char *attname
)
{
indent_out();
printf ("\t\t");
print_name(varname);
printf(":");
print_name(attname);
}
/*
2010-06-03 21:24:43 +08:00
* Print special _Format global attribute, a virtual attribute not
* actually stored in the file.
*/
static void
pr_att_global_format(
int ncid,
int kind
)
{
pr_att_name(ncid, "", NC_ATT_FORMAT);
printf(" = ");
printf("\"%s\"", kind_string(kind));
printf (" ;\n");
}
#ifdef USE_NETCDF4
/*
2010-06-03 21:24:43 +08:00
* Print special reserved variable attributes, such as _Chunking,
* _DeflateLevel, ... These are virtual, not real, attributes
* generated from the result of inquire calls. They are of primitive
* type to fit into the classic model. Currently, these only exist
* for netCDF-4 data.
*/
static void
pr_att_specials(
int ncid,
int kind,
int varid,
const ncvar_t *varp
)
{
int contig = NC_CHUNKED;
2010-06-03 21:24:43 +08:00
/* No special variable attributes for classic or 64-bit offset data */
if(kind == 1 || kind == 2)
return;
/* _Chunking tests */
NC_CHECK( nc_inq_var_chunking(ncid, varid, &contig, NULL ) );
if(contig == NC_CONTIGUOUS) {
pr_att_name(ncid, varp->name, NC_ATT_STORAGE);
2010-06-03 21:24:43 +08:00
printf(" = \"contiguous\" ;\n");
} else if(contig == NC_COMPACT) {
pr_att_name(ncid, varp->name, NC_ATT_STORAGE);
printf(" = \"compact\" ;\n");
} else if(contig == NC_CHUNKED) {
2010-06-03 21:24:43 +08:00
size_t *chunkp;
int i;
pr_att_name(ncid, varp->name, NC_ATT_STORAGE);
printf(" = \"chunked\" ;\n");
chunkp = (size_t *) emalloc(sizeof(size_t) * (varp->ndims + 1) );
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_var_chunking(ncid, varid, NULL, chunkp) );
/* print chunking, even if it is default */
pr_att_name(ncid, varp->name, NC_ATT_CHUNKING);
printf(" = ");
for(i = 0; i < varp->ndims; i++) {
printf("%lu%s", (unsigned long)chunkp[i], i+1 < varp->ndims ? ", " : " ;\n");
}
free(chunkp);
2020-09-03 23:51:46 +08:00
} else if(contig == NC_VIRTUAL) {
pr_att_name(ncid, varp->name, NC_ATT_STORAGE);
printf(" = \"virtual\" ;\n");
} else {
pr_att_name(ncid, varp->name, NC_ATT_STORAGE);
printf(" = \"unknown\" ;\n");
2010-06-03 21:24:43 +08:00
}
Add support for multiple filters per variable. re: https://github.com/Unidata/netcdf-c/issues/1584 Support has been added for multiple filters per variable. This affects a number of components in netcdf. The new APIs are documented in NUG/filters.md. The primary changes are: * A set of new functions are provided (see __include/netcdf_filter.h__). - Obtain a list of the filters associated with a variable - Obtain the parameters for a specific filter. * The existing __nc_inq_var_filter__ function now returns info about the first defined filter. * The utilities (ncgen, ncdump, and nccopy) now support an extended format for specifying a sequence of filters. The general form is __<filter>|<filter>..._. * The ncdump **_Filter** attribute now dumps a list of all the filters associated with a variable using the above new format. * Filter specifications can now use a filter name instead of number for filters known to the netcdf library, which in turn is taken from the HDF5 filter registration page. * New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter is returned if an attempt is made to access an unknown filter. * Internally, the dispatch table has been extended to add a function to handle all of the filter functions. * New, filter-related, tests were added to nc_test4. * A new plugin was added to the plugins directory to help with testing. Notes: 1. The shuffle and fletcher32 filters are not part of the multifilter system. Misc. changes: 1. A debug module was added to libhdf5 to help catch error locations.
2020-02-17 03:59:33 +08:00
/* _Filter (including deflate and shuffle) */
2010-06-03 21:24:43 +08:00
{
Add support for multiple filters per variable. re: https://github.com/Unidata/netcdf-c/issues/1584 Support has been added for multiple filters per variable. This affects a number of components in netcdf. The new APIs are documented in NUG/filters.md. The primary changes are: * A set of new functions are provided (see __include/netcdf_filter.h__). - Obtain a list of the filters associated with a variable - Obtain the parameters for a specific filter. * The existing __nc_inq_var_filter__ function now returns info about the first defined filter. * The utilities (ncgen, ncdump, and nccopy) now support an extended format for specifying a sequence of filters. The general form is __<filter>|<filter>..._. * The ncdump **_Filter** attribute now dumps a list of all the filters associated with a variable using the above new format. * Filter specifications can now use a filter name instead of number for filters known to the netcdf library, which in turn is taken from the HDF5 filter registration page. * New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter is returned if an attempt is made to access an unknown filter. * Internally, the dispatch table has been extended to add a function to handle all of the filter functions. * New, filter-related, tests were added to nc_test4. * A new plugin was added to the plugins directory to help with testing. Notes: 1. The shuffle and fletcher32 filters are not part of the multifilter system. Misc. changes: 1. A debug module was added to libhdf5 to help catch error locations.
2020-02-17 03:59:33 +08:00
size_t nparams, nfilters, nbytes;
int shuffle=NC_NOSHUFFLE;
unsigned int* filterids = NULL;
unsigned int* params = NULL;
int usedeflateatt = 0;
/* Get applicable filter ids */
Mostly revert the filter code to reduce its complexity of use. re: https://github.com/Unidata/netcdf-c/issues/1836 Revert the internal filter code to simplify it. From the user's point of view, the only visible changes should be: 1. The functions that convert text to filter specs have had their signature reverted and have been moved to netcdf_aux.h 2. Some filter API functions now return NC_ENOFILTER when inquiry is made about some filter. Internally,the dispatch table has been modified to get rid of the filter_actions entry and associated complex structures. It has been replaced with inq_var_filter_ids and inq_var_filter_info entries and the dispatch table version has been bumped to 3. Corresponding NOOP and NOTNC4 functions were added to libdispatch/dnotnc4.c. Also, the filter_action entries in dispatch tables were replaced for all dispatch code bases (HDF5, DAP2, etc). This should only impact UDF users. In the process, it became clear that the form of the filters field in NC_VAR_INFO_T was format dependent, so I converted it to be of type void* and pushed its management into the various dispatch code bases. Specifically libhdf5 and libnczarr now manage the filters field in their own way. The auxilliary functions for parsing textual filter specifications were moved to netcdf_aux.h and were renamed to the following: * ncaux_h5filterspec_parse * ncaux_h5filterspec_parselist * ncaux_h5filterspec_free * ncaux_h5filter_fix8 Misc. Other Changes: 1. Document NUG/filters.md updated to reflect the changes above. 2. All the old data types (structs and enums) used by filter_actions actions were deleted. The exception is the NC_H5_Filterspec because it is needed by ncaux_h5filterspec_parselist. 3. Clientside filters were removed -- another enhancement for which no-one ever asked. 4. The ability to remove filters was itself removed. 5. Some functionality needed by nczarr was moved from libhdf5 to libsrc4 e.g. nc4_find_default_chunksizes 6. All the filterx code was removed 7. ncfilter.h and nc4filter.c no longer used Misc. Unrelated Changes: 1. The nczarr_test makefile clean was leaving some directories; so add clean-local to take care of them.
2020-09-28 02:43:46 +08:00
NC_CHECK(nc_inq_var_filter_ids(ncid, varid, &nfilters, NULL));
Add support for multiple filters per variable. re: https://github.com/Unidata/netcdf-c/issues/1584 Support has been added for multiple filters per variable. This affects a number of components in netcdf. The new APIs are documented in NUG/filters.md. The primary changes are: * A set of new functions are provided (see __include/netcdf_filter.h__). - Obtain a list of the filters associated with a variable - Obtain the parameters for a specific filter. * The existing __nc_inq_var_filter__ function now returns info about the first defined filter. * The utilities (ncgen, ncdump, and nccopy) now support an extended format for specifying a sequence of filters. The general form is __<filter>|<filter>..._. * The ncdump **_Filter** attribute now dumps a list of all the filters associated with a variable using the above new format. * Filter specifications can now use a filter name instead of number for filters known to the netcdf library, which in turn is taken from the HDF5 filter registration page. * New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter is returned if an attempt is made to access an unknown filter. * Internally, the dispatch table has been extended to add a function to handle all of the filter functions. * New, filter-related, tests were added to nc_test4. * A new plugin was added to the plugins directory to help with testing. Notes: 1. The shuffle and fletcher32 filters are not part of the multifilter system. Misc. changes: 1. A debug module was added to libhdf5 to help catch error locations.
2020-02-17 03:59:33 +08:00
/* Get set of filters for this variable */
if(nfilters > 0) {
filterids = (unsigned int*)malloc(sizeof(unsigned int)*nfilters);
if(filterids == NULL) NC_CHECK(NC_ENOMEM);
} else
filterids = NULL;
Mostly revert the filter code to reduce its complexity of use. re: https://github.com/Unidata/netcdf-c/issues/1836 Revert the internal filter code to simplify it. From the user's point of view, the only visible changes should be: 1. The functions that convert text to filter specs have had their signature reverted and have been moved to netcdf_aux.h 2. Some filter API functions now return NC_ENOFILTER when inquiry is made about some filter. Internally,the dispatch table has been modified to get rid of the filter_actions entry and associated complex structures. It has been replaced with inq_var_filter_ids and inq_var_filter_info entries and the dispatch table version has been bumped to 3. Corresponding NOOP and NOTNC4 functions were added to libdispatch/dnotnc4.c. Also, the filter_action entries in dispatch tables were replaced for all dispatch code bases (HDF5, DAP2, etc). This should only impact UDF users. In the process, it became clear that the form of the filters field in NC_VAR_INFO_T was format dependent, so I converted it to be of type void* and pushed its management into the various dispatch code bases. Specifically libhdf5 and libnczarr now manage the filters field in their own way. The auxilliary functions for parsing textual filter specifications were moved to netcdf_aux.h and were renamed to the following: * ncaux_h5filterspec_parse * ncaux_h5filterspec_parselist * ncaux_h5filterspec_free * ncaux_h5filter_fix8 Misc. Other Changes: 1. Document NUG/filters.md updated to reflect the changes above. 2. All the old data types (structs and enums) used by filter_actions actions were deleted. The exception is the NC_H5_Filterspec because it is needed by ncaux_h5filterspec_parselist. 3. Clientside filters were removed -- another enhancement for which no-one ever asked. 4. The ability to remove filters was itself removed. 5. Some functionality needed by nczarr was moved from libhdf5 to libsrc4 e.g. nc4_find_default_chunksizes 6. All the filterx code was removed 7. ncfilter.h and nc4filter.c no longer used Misc. Unrelated Changes: 1. The nczarr_test makefile clean was leaving some directories; so add clean-local to take care of them.
2020-09-28 02:43:46 +08:00
NC_CHECK(nc_inq_var_filter_ids(ncid, varid, &nfilters, filterids));
Add support for multiple filters per variable. re: https://github.com/Unidata/netcdf-c/issues/1584 Support has been added for multiple filters per variable. This affects a number of components in netcdf. The new APIs are documented in NUG/filters.md. The primary changes are: * A set of new functions are provided (see __include/netcdf_filter.h__). - Obtain a list of the filters associated with a variable - Obtain the parameters for a specific filter. * The existing __nc_inq_var_filter__ function now returns info about the first defined filter. * The utilities (ncgen, ncdump, and nccopy) now support an extended format for specifying a sequence of filters. The general form is __<filter>|<filter>..._. * The ncdump **_Filter** attribute now dumps a list of all the filters associated with a variable using the above new format. * Filter specifications can now use a filter name instead of number for filters known to the netcdf library, which in turn is taken from the HDF5 filter registration page. * New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter is returned if an attempt is made to access an unknown filter. * Internally, the dispatch table has been extended to add a function to handle all of the filter functions. * New, filter-related, tests were added to nc_test4. * A new plugin was added to the plugins directory to help with testing. Notes: 1. The shuffle and fletcher32 filters are not part of the multifilter system. Misc. changes: 1. A debug module was added to libhdf5 to help catch error locations.
2020-02-17 03:59:33 +08:00
if(nfilters > 0) {
int k;
int pratt = 0;
for(k=0;k<nfilters;k++) {
NC_CHECK(nc_inq_var_filter_info(ncid, varid, filterids[k], &nparams, NULL));
if(nparams > 0) {
params = (unsigned int*)calloc(1,sizeof(unsigned int)*nparams);
NC_CHECK(nc_inq_var_filter_info(ncid, varid, filterids[k], &nbytes, params));
} else
params = NULL;
/* Use _Deflate if the first filter is zip */
if(k == 0 && filterids[k] == H5Z_FILTER_DEFLATE) {
pr_att_name(ncid, varp->name, NC_ATT_DEFLATE);
printf(" = %d", (int)params[0]);
pratt = 1;
usedeflateatt = 1;
nullfree(params); params = NULL;
continue;
}
if(pratt || k == 0) {
pr_att_name(ncid,varp->name,NC_ATT_FILTER);
printf(" = \"");
pratt = 0;
}
if(k > 0) printf("|");
printf("%u",filterids[k]);
if(nparams > 0) {
int i;
for(i=0;i<nparams;i++)
printf(",%u",params[i]);
}
nullfree(params); params = NULL;
}
if(!usedeflateatt) printf("\"");
printf(" ;\n");
2010-06-03 21:24:43 +08:00
}
Add support for multiple filters per variable. re: https://github.com/Unidata/netcdf-c/issues/1584 Support has been added for multiple filters per variable. This affects a number of components in netcdf. The new APIs are documented in NUG/filters.md. The primary changes are: * A set of new functions are provided (see __include/netcdf_filter.h__). - Obtain a list of the filters associated with a variable - Obtain the parameters for a specific filter. * The existing __nc_inq_var_filter__ function now returns info about the first defined filter. * The utilities (ncgen, ncdump, and nccopy) now support an extended format for specifying a sequence of filters. The general form is __<filter>|<filter>..._. * The ncdump **_Filter** attribute now dumps a list of all the filters associated with a variable using the above new format. * Filter specifications can now use a filter name instead of number for filters known to the netcdf library, which in turn is taken from the HDF5 filter registration page. * New errors are defined: NC_EFILTER and NC_ENOFILTER. The latter is returned if an attempt is made to access an unknown filter. * Internally, the dispatch table has been extended to add a function to handle all of the filter functions. * New, filter-related, tests were added to nc_test4. * A new plugin was added to the plugins directory to help with testing. Notes: 1. The shuffle and fletcher32 filters are not part of the multifilter system. Misc. changes: 1. A debug module was added to libhdf5 to help catch error locations.
2020-02-17 03:59:33 +08:00
if(filterids) free(filterids);
/* Finally, do Shuffle */
NC_CHECK( nc_inq_var_deflate(ncid, varid, &shuffle, NULL, NULL));
2010-06-03 21:24:43 +08:00
if(shuffle != NC_NOSHUFFLE) {
pr_att_name(ncid, varp->name, NC_ATT_SHUFFLE);
printf(" = \"true\" ;\n");
}
}
Add filter support to NCZarr Filter support has three goals: 1. Use the existing HDF5 filter implementations, 2. Allow filter metadata to be stored in the NumCodecs metadata format used by Zarr, 3. Allow filters to be used even when HDF5 is disabled Detailed usage directions are define in docs/filters.md. For now, the existing filter API is left in place. So filters are defined using ''nc_def_var_filter'' using the HDF5 style where the id and parameters are unsigned integers. This is a big change since filters affect many parts of the code. In the following, the terms "compressor" and "filter" and "codec" are generally used synonomously. ### Filter-Related Changes: * In order to support dynamic loading of shared filter libraries, a new library was added in the libncpoco directory; it helps to isolate dynamic loading across multiple platforms. * Provide a json parsing library for use by plugins; this is created by merging libdispatch/ncjson.c with include/ncjson.h. * Add a new _Codecs attribute to allow clients to see what codecs are being used; let ncdump -s print it out. * Provide special headers to help support compilation of HDF5 filters when HDF5 is not enabled: netcdf_filter_hdf5_build.h and netcdf_filter_build.h. * Add a number of new test to test the new nczarr filters. * Let ncgen parse _Codecs attribute, although it is ignored. ### Plugin directory changes: * Add support for the Blosc compressor; this is essential because it is the most common compressor used in Zarr datasets. This also necessitated adding a CMake FindBlosc.cmake file * Add NCZarr support for the big-four filters provided by HDF5: shuffle, fletcher32, deflate (zlib), and szip * Add a Codec defaulter (see docs/filters.md) for the big four filters. * Make plugins work with windows by properly adding __declspec declaration. ### Misc. Non-Filter Changes * Replace most uses of USE_NETCDF4 (deprecated) with USE_HDF5. * Improve support for caching * More fixes for path conversion code * Fix misc. memory leaks * Add new utility -- ncdump/ncpathcvt -- that does more or less the same thing as cygpath. * Add a number of new test to test the non-filter fixes. * Update the parsers * Convert most instances of '#ifdef _MSC_VER' to '#ifdef _WIN32'
2021-09-03 07:04:26 +08:00
/* _Codecs*/
{
int stat;
size_t len;
nc_type typeid;
stat = nc_inq_att(ncid,varid,NC_ATT_CODECS,&typeid,&len);
if(stat == NC_NOERR && typeid == NC_CHAR && len > 0) {
char* json = (char*)malloc(len+1);
if(json != NULL) {
stat = nc_get_att_text(ncid,varid,NC_ATT_CODECS,json);
if(stat == NC_NOERR) {
char* escapedjson = NULL;
pr_att_name(ncid, varp->name, NC_ATT_CODECS);
/* Escape the json */
escapedjson = escaped_string(json);
printf(" = \"%s\" ;\n",escapedjson);
free(escapedjson);
}
free(json);
}
}
}
2010-06-03 21:24:43 +08:00
/* _Checksum */
{
int fletcher32 = 0;
NC_CHECK( nc_inq_var_fletcher32(ncid, varid, &fletcher32) );
if(fletcher32 != 0) {
pr_att_name(ncid, varp->name, NC_ATT_CHECKSUM);
printf(" = \"true\" ;\n");
}
}
/* _Endianness */
if(varp->tinfo->size > 1) /* Endianness is meaningless for 1-byte types */
{
int endianness = 0;
NC_CHECK( nc_inq_var_endian(ncid, varid, &endianness) );
if (endianness != NC_ENDIAN_NATIVE) { /* NC_ENDIAN_NATIVE is the default */
2010-06-03 21:24:43 +08:00
pr_att_name(ncid, varp->name, NC_ATT_ENDIANNESS);
printf(" = ");
switch (endianness) {
case NC_ENDIAN_LITTLE:
printf("\"little\"");
break;
case NC_ENDIAN_BIG:
printf("\"big\"");
break;
default:
error("pr_att_specials: bad endianness: %d", endianness);
break;
}
printf(" ;\n");
}
}
{
int no_fill = 0;
/* Don't get the fill_value, it's set explicitly with
* _FillValue attribute, because nc_def_var_fill() creates a
* _FillValue attribute, if needed, and it's value gets
* displayed elsewhere as a normal (not special virtual)
* attribute. */
NC_CHECK( nc_inq_var_fill(ncid, varid, &no_fill, NULL) );
if(no_fill != 0) {
pr_att_name(ncid, varp->name, NC_ATT_NOFILL);
printf(" = \"true\" ;\n");
}
}
2010-06-03 21:24:43 +08:00
/* TODO: handle _Nbit when inquire function is available */
/* TODO: handle _ScaleOffset when inquire is available */
/* TODO: handle _Szip when szip inquire function is available */
}
#endif /* USE_NETCDF4 */
static void
pr_att_hidden(
int ncid,
int kind
)
{
int stat;
size_t len;
/* No special variable attributes for classic or 64-bit offset data */
#if 0
if(kind == 1 || kind == 2) return;
#endif
/* Print out Selected hidden attributes */
/* NCPROPS */
stat = nc_inq_att(ncid,NC_GLOBAL,NCPROPS,NULL,&len);
if(stat == NC_NOERR) {
char* propdata = (char*)malloc(len+1);
if(propdata == NULL)
return;
stat = nc_get_att_text(ncid,NC_GLOBAL,NCPROPS,propdata);
if(stat == NC_NOERR) {
pr_att_name(ncid, "", NCPROPS);
/* make sure its null terminated */
propdata[len] = '\0';
printf(" = \"%s\" ;\n",propdata);
}
free(propdata);
}
/* _SuperblockVersion */
stat = nc_inq_att(ncid,NC_GLOBAL,SUPERBLOCKATT,NULL,&len);
if(stat == NC_NOERR && len == 1) {
int sbversion;
stat = nc_get_att_int(ncid,NC_GLOBAL,SUPERBLOCKATT,&sbversion);
if(stat == NC_NOERR) {
pr_att_name(ncid, "", SUPERBLOCKATT);
printf(" = %d ;\n",sbversion);
}
}
/* _IsNetcdf4 */
stat = nc_inq_att(ncid,NC_GLOBAL,ISNETCDF4ATT,NULL,&len);
if(stat == NC_NOERR && len == 1) {
int isnc4;
stat = nc_get_att_int(ncid,NC_GLOBAL,ISNETCDF4ATT,&isnc4);
if(stat == NC_NOERR) {
pr_att_name(ncid, "", ISNETCDF4ATT);
printf(" = %d ;\n",isnc4?1:0);
}
}
}
2010-06-03 21:24:43 +08:00
/*
2010-06-03 21:24:43 +08:00
* Print a variable attribute for NcML
*/
static void
pr_attx(
int ncid,
int varid,
int ia
)
{
ncatt_t att; /* attribute */
char *attvals = NULL;
2010-06-03 21:24:43 +08:00
int attvalslen = 0;
NC_CHECK( nc_inq_attname(ncid, varid, ia, att.name) );
#ifdef USE_NETCDF4
if (ncid == getrootid(ncid)
&& varid == NC_GLOBAL
&& strcmp(att.name,NCPROPS)==0
&& (!formatting_specs.special_atts
|| !formatting_specs.xopt_props)
)
return;
#endif
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_att(ncid, varid, att.name, &att.type, &att.len) );
/* Put attribute values into a single string, with blanks in between */
switch (att.type) {
case NC_CHAR:
attvals = (char *) emalloc(att.len + 1);
attvalslen = att.len;
attvals[att.len] = '\0';
NC_CHECK( nc_get_att_text(ncid, varid, att.name, attvals ) );
break;
#ifdef USE_NETCDF4
case NC_STRING:
/* TODO: this only prints first string value, need to handle
multiple strings? */
attvals = (char *) emalloc(att.len + 1);
attvals[att.len] = '\0';
NC_CHECK( nc_get_att_text(ncid, varid, att.name, attvals ) );
break;
case NC_VLEN:
/* TODO */
break;
case NC_OPAQUE:
/* TODO */
break;
case NC_COMPOUND:
/* TODO */
break;
#endif /* USE_NETCDF4 */
default:
att.vals = (double *) emalloc((att.len + 1) * sizeof(double));
NC_CHECK( nc_get_att_double(ncid, varid, att.name, att.vals ) );
attvalslen = PRIM_LEN * att.len; /* max chars for each value and blank separator */
2010-06-03 21:24:43 +08:00
attvals = (char *) emalloc(attvalslen + 1);
pr_att_valsx(att.type, att.len, att.vals, attvals, attvalslen);
free(att.vals);
2010-06-03 21:24:43 +08:00
break;
}
/* Don't output type for string attributes, since that's default type */
if(att.type == NC_CHAR
#ifdef USE_NETCDF4
|| att.type == NC_CHAR
#endif /* USE_NETCDF4 */
) {
/* TODO: XML-ish escapes for special chars in names */
printf ("%s <attribute name=\"%s\" value=",
varid != NC_GLOBAL ? " " : "",
2010-06-03 21:24:43 +08:00
att.name);
/* print attvals as a string with XML escapes */
pr_attx_string(att.name, attvalslen, attvals);
2010-06-03 21:24:43 +08:00
} else { /* non-string attribute */
char att_type_name[NC_MAX_NAME + 1];
get_type_name(ncid, att.type, att_type_name);
/* TODO: print full type name with group prefix, when needed */
printf ("%s <attribute name=\"%s\" type=\"%s\" value=\"",
varid != NC_GLOBAL ? " " : "",
att.name,
2010-06-03 21:24:43 +08:00
att_type_name);
printf("%s\"",attvals);
}
printf (" />\n");
if(attvals != NULL)
free (attvals);
2010-06-03 21:24:43 +08:00
}
/* Print optional NcML attribute for a variable's shape */
static void
pr_shape(ncvar_t* varp, ncdim_t *dims)
{
char *shape;
int shapelen = 0;
int id;
if (varp->ndims == 0)
return;
for (id = 0; id < varp->ndims; id++) {
shapelen += strlen(dims[varp->dims[id]].name) + 1;
}
shape = (char *) emalloc(shapelen + 1);
2010-06-03 21:24:43 +08:00
shape[0] = '\0';
for (id = 0; id < varp->ndims; id++) {
/* TODO: XML-ish escapes for special chars in dim names */
strlcat(shape, dims[varp->dims[id]].name, shapelen);
strlcat(shape, id < varp->ndims-1 ? " " : "", shapelen);
}
printf (" shape=\"%s\"", shape);
free(shape);
}
#ifdef USE_NETCDF4
/* Print an enum type declaration */
static void
print_enum_type(int ncid, nc_type typeid) {
char type_name[NC_MAX_NAME + 1];
size_t type_size;
nc_type base_nc_type;
size_t type_nfields;
int type_class;
char base_type_name[NC_MAX_NAME + 1];
int f;
int64_t memval;
char memname[NC_MAX_NAME + 1];
/* extra space for escapes, and punctuation */
#define SAFE_BUF_LEN 4*NC_MAX_NAME+30
char safe_buf[SAFE_BUF_LEN];
char *delim;
int64_t data; /* space for data of any primitive type */
void* raw;
2010-06-03 21:24:43 +08:00
char *esc_btn;
char *esc_tn;
char *esc_mn;
int res;
NC_CHECK( nc_inq_user_type(ncid, typeid, type_name, &type_size, &base_nc_type,
2010-06-03 21:24:43 +08:00
&type_nfields, &type_class) );
get_type_name(ncid, base_nc_type, base_type_name);
2010-06-03 21:24:43 +08:00
indent_out();
esc_btn = escaped_name(base_type_name);
esc_tn = escaped_name(type_name);
res = snprintf(safe_buf, SAFE_BUF_LEN,"%s enum %s {", esc_btn, esc_tn);
assert(res < SAFE_BUF_LEN);
free(esc_btn);
free(esc_tn);
lput(safe_buf);
delim = ", ";
for (f = 0; f < type_nfields; f++) {
if (f == type_nfields - 1)
delim = "} ;\n";
NC_CHECK( nc_inq_enum_member(ncid, typeid, f, memname, &data) );
raw = (void*)&data;
2010-06-03 21:24:43 +08:00
switch (base_nc_type) {
case NC_BYTE:
memval = *(char *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_SHORT:
memval = *(short *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_INT:
memval = *(int *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_UBYTE:
memval = *(unsigned char *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_USHORT:
memval = *(unsigned short *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_UINT:
memval = *(unsigned int *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_INT64:
memval = *(int64_t *)raw;
2010-06-03 21:24:43 +08:00
break;
case NC_UINT64:
memval = *(uint64_t *)raw;
2010-06-03 21:24:43 +08:00
break;
default:
error("Bad base type for enum!");
break;
}
esc_mn = escaped_name(memname);
res = snprintf(safe_buf, SAFE_BUF_LEN, "%s = %lld%s", esc_mn,
2010-06-03 21:24:43 +08:00
memval, delim);
assert(res < SAFE_BUF_LEN);
free(esc_mn);
lput(safe_buf);
}
}
/* Print a user-defined type declaration */
static void
print_ud_type(int ncid, nc_type typeid) {
2010-06-03 21:24:43 +08:00
char type_name[NC_MAX_NAME + 1];
char base_type_name[NC_MAX_NAME + 1];
size_t type_nfields, type_size;
nc_type base_nc_type;
int f, type_class;
NC_CHECK( nc_inq_user_type(ncid, typeid, type_name, &type_size, &base_nc_type,
2010-06-03 21:24:43 +08:00
&type_nfields, &type_class) );
switch(type_class) {
case NC_VLEN:
/* TODO: don't bother getting base_type_name if
* print_type_name looks it up anyway */
get_type_name(ncid, base_nc_type, base_type_name);
indent_out();
/* printf("%s(*) %s ;\n", base_type_name, type_name); */
print_type_name(ncid, base_nc_type);
printf("(*) ");
print_type_name(ncid, typeid);
printf(" ;\n");
break;
case NC_OPAQUE:
indent_out();
/* printf("opaque(%d) %s ;\n", (int)type_size, type_name); */
printf("opaque(%d) ", (int)type_size);
print_type_name(ncid, typeid);
printf(" ;\n");
break;
case NC_ENUM:
print_enum_type(ncid, typeid);
break;
case NC_COMPOUND:
{
char field_name[NC_MAX_NAME + 1];
char field_type_name[NC_MAX_NAME + 1];
size_t field_offset;
nc_type field_type;
int field_ndims;
2010-06-03 21:24:43 +08:00
int d;
2010-06-03 21:24:43 +08:00
indent_out();
/* printf("compound %s {\n", type_name); */
printf("compound ");
print_type_name(ncid, typeid);
printf(" {\n");
for (f = 0; f < type_nfields; f++)
{
NC_CHECK( nc_inq_compound_field(ncid, typeid, f, field_name,
&field_offset, &field_type,
&field_ndims, NULL) );
2010-06-03 21:24:43 +08:00
/* TODO: don't bother if field_type_name not needed here */
get_type_name(ncid, field_type, field_type_name);
indent_out();
/* printf(" %s %s", field_type_name, field_name); */
printf(" ");
print_type_name(ncid, field_type);
printf(" ");
print_name(field_name);
if (field_ndims > 0) {
int *field_dim_sizes = (int *) emalloc((field_ndims + 1) * sizeof(int));
NC_CHECK( nc_inq_compound_field(ncid, typeid, f, NULL,
NULL, NULL, NULL,
field_dim_sizes) );
2010-06-03 21:24:43 +08:00
printf("(");
for (d = 0; d < field_ndims-1; d++)
printf("%d, ", field_dim_sizes[d]);
printf("%d)", field_dim_sizes[field_ndims-1]);
free(field_dim_sizes);
2010-06-03 21:24:43 +08:00
}
printf(" ;\n");
}
indent_out();
#if 0
printf("}; // %s\n", type_name);
#else
2010-06-03 21:24:43 +08:00
printf("}; // ");
#endif
2010-06-03 21:24:43 +08:00
print_type_name(ncid, typeid);
printf("\n");
}
break;
default:
error("Unknown class of user-defined type!");
}
}
#endif /* USE_NETCDF4 */
static void
get_fill_info(int ncid, int varid, ncvar_t *vp)
{
2010-06-03 21:24:43 +08:00
ncatt_t att; /* attribute */
int nc_status; /* return from netcdf calls */
void *fillvalp = NULL;
2010-06-03 21:24:43 +08:00
vp->has_fillval = 1; /* by default, but turn off for bytes */
2010-06-03 21:24:43 +08:00
/* get _FillValue attribute */
nc_status = nc_inq_att(ncid,varid,_FillValue,&att.type,&att.len);
fillvalp = ecalloc(vp->tinfo->size + 1);
2010-06-03 21:24:43 +08:00
if(nc_status == NC_NOERR &&
att.type == vp->type && att.len == 1) {
NC_CHECK(nc_get_att(ncid, varid, _FillValue, fillvalp));
} else {
switch (vp->type) {
case NC_BYTE:
/* don't do default fill-values for bytes, too risky */
vp->has_fillval = 0;
free(fillvalp);
fillvalp = 0;
break;
case NC_CHAR:
*(char *)fillvalp = NC_FILL_CHAR;
break;
case NC_SHORT:
*(short *)fillvalp = NC_FILL_SHORT;
break;
case NC_INT:
*(int *)fillvalp = NC_FILL_INT;
break;
case NC_FLOAT:
*(float *)fillvalp = NC_FILL_FLOAT;
break;
case NC_DOUBLE:
*(double *)fillvalp = NC_FILL_DOUBLE;
break;
case NC_UBYTE:
/* don't do default fill-values for bytes, too risky */
vp->has_fillval = 0;
free(fillvalp);
fillvalp = 0;
break;
case NC_USHORT:
*(unsigned short *)fillvalp = NC_FILL_USHORT;
break;
case NC_UINT:
*(unsigned int *)fillvalp = NC_FILL_UINT;
break;
case NC_INT64:
*(int64_t *)fillvalp = NC_FILL_INT64;
break;
case NC_UINT64:
*(uint64_t *)fillvalp = NC_FILL_UINT64;
break;
2015-08-16 06:26:35 +08:00
#ifdef USE_NETCDF4
case NC_STRING: {
char* s;
size_t len = strlen(NC_FILL_STRING);
Fix more memory leaks in netcdf-c library This is a follow up to PR https://github.com/Unidata/netcdf-c/pull/1173 Sorry that it is so big, but leak suppression can be complex. This PR fixes all remaining memory leaks -- as determined by -fsanitize=address, and with the exceptions noted below. Unfortunately. there remains a significant leak that I cannot solve. It involves vlens, and it is unclear if the leak is occurring in the netcdf-c library or the HDF5 library. I have added a check_PROGRAM to the ncdump directory to show the problem. The program is called tst_vlen_demo.c To exercise it, build the netcdf library with -fsanitize=address enabled. Then go into ncdump and do a "make clean check". This should build tst_vlen_demo without actually executing it. Then do the command "./tst_vlen_demo" to see the output of the memory checker. Note the the lost malloc is deep in the HDF5 library (in H5Tvlen.c). I am temporarily working around this error in the following way. 1. I modified several test scripts to not execute known vlen tests that fail as described above. 2. Added an environment variable called NC_VLEN_NOTEST. If set, then those specific tests are suppressed. This should mean that the --disable-utilities option to ./configure should not need to be set to get a memory leak clean build. This should allow for detection of any new leaks. Note: I used an environment variable rather than a ./configure option to control the vlen tests. This is because it is temporary (I hope) and because it is a bit tricky for shell scripts to access ./configure options. Finally, as before, this only been tested with netcdf-4 and hdf5 support.
2018-11-16 01:00:38 +08:00
#if 0
/* In order to avoid mem leak, allocate this string as part of fillvalp */
fillvalp = erealloc(fillvalp, vp->tinfo->size + 1 + len + 1);
s = ((char*)fillvalp) + vp->tinfo->size + 1;
Fix more memory leaks in netcdf-c library This is a follow up to PR https://github.com/Unidata/netcdf-c/pull/1173 Sorry that it is so big, but leak suppression can be complex. This PR fixes all remaining memory leaks -- as determined by -fsanitize=address, and with the exceptions noted below. Unfortunately. there remains a significant leak that I cannot solve. It involves vlens, and it is unclear if the leak is occurring in the netcdf-c library or the HDF5 library. I have added a check_PROGRAM to the ncdump directory to show the problem. The program is called tst_vlen_demo.c To exercise it, build the netcdf library with -fsanitize=address enabled. Then go into ncdump and do a "make clean check". This should build tst_vlen_demo without actually executing it. Then do the command "./tst_vlen_demo" to see the output of the memory checker. Note the the lost malloc is deep in the HDF5 library (in H5Tvlen.c). I am temporarily working around this error in the following way. 1. I modified several test scripts to not execute known vlen tests that fail as described above. 2. Added an environment variable called NC_VLEN_NOTEST. If set, then those specific tests are suppressed. This should mean that the --disable-utilities option to ./configure should not need to be set to get a memory leak clean build. This should allow for detection of any new leaks. Note: I used an environment variable rather than a ./configure option to control the vlen tests. This is because it is temporary (I hope) and because it is a bit tricky for shell scripts to access ./configure options. Finally, as before, this only been tested with netcdf-4 and hdf5 support.
2018-11-16 01:00:38 +08:00
#else
s = malloc(len+1);
#endif
memcpy(s,NC_FILL_STRING,len);
s[len] = '\0';
*((char **)fillvalp) = s;
} break;
2010-06-03 21:24:43 +08:00
#endif /* USE_NETCDF4 */
default: /* no default fill values for NC_NAT
or user-defined types */
vp->has_fillval = 0;
free(fillvalp);
fillvalp = 0;
break;
}
}
vp->fillvalp = fillvalp;
}
2011-09-22 07:10:03 +08:00
/* Recursively dump the contents of a group. (Only netcdf-4 format
* files can have groups, so recursion will not take place for classic
* format files.)
2010-06-03 21:24:43 +08:00
*
* ncid: id of open file (first call) or group (subsequent recursive calls)
2011-08-26 11:54:25 +08:00
* path: file path name (first call)
2010-06-03 21:24:43 +08:00
*/
static void
do_ncdump_rec(int ncid, const char *path)
2010-06-03 21:24:43 +08:00
{
int ndims; /* number of dimensions */
int nvars; /* number of variables */
int ngatts; /* number of global attributes */
int xdimid; /* id of unlimited dimension */
int varid; /* variable id */
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
int rootncid; /* id of root group */
2010-06-03 21:24:43 +08:00
ncdim_t *dims; /* dimensions */
size_t *vdims=0; /* dimension sizes for a single variable */
ncvar_t var; /* variable */
int id; /* dimension number per variable */
int ia; /* attribute number */
int iv; /* variable number */
idnode_t* vlist = NULL; /* list for vars specified with -v option */
2010-06-03 21:24:43 +08:00
char type_name[NC_MAX_NAME + 1];
int kind; /* strings output differently for nc4 files */
char dim_name[NC_MAX_NAME + 1];
#ifdef USE_NETCDF4
int *dimids_grp; /* dimids of the dims in this group. */
int *unlimids; /* dimids of unlimited dimensions in this group */
int d_grp, ndims_grp;
int ntypes, *typeids;
int nunlim;
#else
int dimid; /* dimension id */
#endif /* USE_NETCDF4 */
2011-08-26 11:54:25 +08:00
int is_root = 1; /* true if ncid is root group or if netCDF-3 */
#ifdef USE_NETCDF4
if (nc_inq_grp_parent(ncid, NULL) != NC_ENOGRP)
is_root = 0;
#endif /* USE_NETCDF4 */
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
NC_CHECK(nc_inq_ncid(ncid,NULL,&rootncid)); /* get root group ncid */
2010-06-03 21:24:43 +08:00
/*
* If any vars were specified with -v option, get list of
* associated variable ids relative to this group. Assume vars
* specified with syntax like "grp1/grp2/varname" or
* "/grp1/grp2/varname" if they are in groups.
*/
if (formatting_specs.nlvars > 0) {
2011-09-22 07:10:03 +08:00
vlist = newidlist(); /* list for vars specified with -v option */
for (iv=0; iv < formatting_specs.nlvars; iv++) {
if(nc_inq_gvarid(ncid, formatting_specs.lvars[iv], &varid) == NC_NOERR)
2011-09-22 07:10:03 +08:00
idadd(vlist, varid);
2010-06-03 21:24:43 +08:00
}
}
#ifdef USE_NETCDF4
/* Are there any user defined types in this group? */
NC_CHECK( nc_inq_typeids(ncid, &ntypes, NULL) );
if (ntypes)
{
int t;
typeids = emalloc((ntypes + 1) * sizeof(int));
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_typeids(ncid, &ntypes, typeids) );
indent_out();
printf("types:\n");
indent_more();
for (t = 0; t < ntypes; t++)
{
print_ud_type(ncid, typeids[t]); /* print declaration of user-defined type */
}
indent_less();
free(typeids);
}
#endif /* USE_NETCDF4 */
/*
* get number of dimensions, number of variables, number of global
* atts, and dimension id of unlimited dimension, if any
*/
NC_CHECK( nc_inq(ncid, &ndims, &nvars, &ngatts, &xdimid) );
/* get dimension info */
dims = (ncdim_t *) emalloc((ndims + 1) * sizeof(ncdim_t));
if (ndims > 0) {
indent_out();
printf ("dimensions:\n");
}
#ifdef USE_NETCDF4
/* In netCDF-4 files, dimids will not be sequential because they
* may be defined in various groups, and we are only looking at one
* group at a time. */
/* Find the number of dimids defined in this group. */
NC_CHECK( nc_inq_ndims(ncid, &ndims_grp) );
dimids_grp = (int *)emalloc((ndims_grp + 1) * sizeof(int));
2010-06-03 21:24:43 +08:00
/* Find the dimension ids in this group. */
NC_CHECK( nc_inq_dimids(ncid, 0, dimids_grp, 0) );
/* Find the number of unlimited dimensions and get their IDs */
NC_CHECK( nc_inq_unlimdims(ncid, &nunlim, NULL) );
unlimids = (int *)emalloc((nunlim + 1) * sizeof(int));
NC_CHECK( nc_inq_unlimdims(ncid, &nunlim, unlimids) );
2010-06-03 21:24:43 +08:00
/* For each dimension defined in this group, get and print out info. */
for (d_grp = 0; d_grp < ndims_grp; d_grp++)
{
int dimid = dimids_grp[d_grp];
int is_unlimited = 0;
int uld;
int stat;
for (uld = 0; uld < nunlim; uld++) {
if(dimid == unlimids[uld]) {
is_unlimited = 1;
break;
}
2010-06-03 21:24:43 +08:00
}
stat = nc_inq_dim(ncid, dimid, dims[d_grp].name, &dims[d_grp].size);
if (stat == NC_EDIMSIZE && SIZEOF_SIZE_T < 8) {
error("dimension \"%s\" too large for 32-bit platform, try 64-bit version", dims[d_grp].name);
} else {
NC_CHECK (stat);
}
indent_out();
printf ("\t");
print_name(dims[d_grp].name);
printf (" = ");
if(SIZEOF_SIZE_T >= 8) {
if (is_unlimited) {
printf ("UNLIMITED ; // (%lu currently)\n",
2010-06-03 21:24:43 +08:00
(unsigned long)dims[d_grp].size);
} else {
printf ("%lu ;\n", (unsigned long)dims[d_grp].size);
}
} else { /* 32-bit platform */
if (is_unlimited) {
printf ("UNLIMITED ; // (%u currently)\n",
2010-06-03 21:24:43 +08:00
(unsigned int)dims[d_grp].size);
} else {
printf ("%u ;\n", (unsigned int)dims[d_grp].size);
}
}
}
if(unlimids)
free(unlimids);
if(dimids_grp)
free(dimids_grp);
#else /* not using netCDF-4 */
for (dimid = 0; dimid < ndims; dimid++) {
NC_CHECK( nc_inq_dim(ncid, dimid, dims[dimid].name, &dims[dimid].size) );
indent_out();
printf ("\t");
print_name(dims[dimid].name);
printf (" = ");
if (dimid == xdimid) {
printf ("UNLIMITED ; // (%u currently)\n",
2010-06-03 21:24:43 +08:00
(unsigned int)dims[dimid].size);
} else {
printf ("%llu ;\n", (unsigned long long)dims[dimid].size);
2010-06-03 21:24:43 +08:00
}
}
#endif /* USE_NETCDF4 */
if (nvars > 0) {
indent_out();
printf ("variables:\n");
}
/* Because netCDF-4 can have a string attribute with multiple
* string values, we can't output strings with embedded newlines
* as what look like multiple strings, as we do for classic and
2015-08-16 06:26:35 +08:00
* 64-bit offset and cdf5 files. So we need to know the output file type
2010-06-03 21:24:43 +08:00
* to know how to print strings with embedded newlines. */
NC_CHECK( nc_inq_format(ncid, &kind) );
2010-06-03 21:24:43 +08:00
memset((void*)&var,0,sizeof(var));
Fix more memory leaks in netcdf-c library This is a follow up to PR https://github.com/Unidata/netcdf-c/pull/1173 Sorry that it is so big, but leak suppression can be complex. This PR fixes all remaining memory leaks -- as determined by -fsanitize=address, and with the exceptions noted below. Unfortunately. there remains a significant leak that I cannot solve. It involves vlens, and it is unclear if the leak is occurring in the netcdf-c library or the HDF5 library. I have added a check_PROGRAM to the ncdump directory to show the problem. The program is called tst_vlen_demo.c To exercise it, build the netcdf library with -fsanitize=address enabled. Then go into ncdump and do a "make clean check". This should build tst_vlen_demo without actually executing it. Then do the command "./tst_vlen_demo" to see the output of the memory checker. Note the the lost malloc is deep in the HDF5 library (in H5Tvlen.c). I am temporarily working around this error in the following way. 1. I modified several test scripts to not execute known vlen tests that fail as described above. 2. Added an environment variable called NC_VLEN_NOTEST. If set, then those specific tests are suppressed. This should mean that the --disable-utilities option to ./configure should not need to be set to get a memory leak clean build. This should allow for detection of any new leaks. Note: I used an environment variable rather than a ./configure option to control the vlen tests. This is because it is temporary (I hope) and because it is a bit tricky for shell scripts to access ./configure options. Finally, as before, this only been tested with netcdf-4 and hdf5 support.
2018-11-16 01:00:38 +08:00
/* For each var, get and print out info. */
2010-06-03 21:24:43 +08:00
for (varid = 0; varid < nvars; varid++) {
NC_CHECK( nc_inq_varndims(ncid, varid, &var.ndims) );
if(var.dims != NULL) free(var.dims);
var.dims = (int *) emalloc((var.ndims + 1) * sizeof(int));
NC_CHECK( nc_inq_var(ncid, varid, var.name, &var.type, 0,
var.dims, &var.natts) );
/* TODO: don't bother if type name not needed here */
get_type_name(ncid, var.type, type_name);
var.tinfo = get_typeinfo(var.type);
indent_out();
/* printf ("\t%s %s", type_name, var.name); */
printf ("\t");
/* TODO: if duplicate type name and not just inherited, print
* full type name. */
print_type_name (ncid, var.type);
printf (" ");
print_name (var.name);
if (var.ndims > 0)
printf ("(");
for (id = 0; id < var.ndims; id++) {
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
/* Get the base name of the dimension */
2010-06-03 21:24:43 +08:00
NC_CHECK( nc_inq_dimname(ncid, var.dims[id], dim_name) );
#ifdef USE_NETCDF4
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
/* This dim may be in a parent group, so let's look up dimid
* parent group; if it is not current group, then we will print
* the fully qualified name.
* Subtlety: The following code block is needed because
* nc_inq_dimname() currently returns only a simple dimension
* name, without a prefix identifying the group it came from.
* That's OK unless the dimid identifies a dimension in an
* ancestor group that has the same simple name as a
* dimension in the current group (or some intermediate
* group), in which case the simple name is ambiguous. This
* code tests for that case and provides an absolute dimname
* only in the case where a simple name would be
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
* ambiguous.
* The algorithm is as follows:
* 1. Search up the tree of ancestor groups.
* 2. If one of those groups contains the dimid, then call it dimgrp.
* 3. If one of those groups contains a dim with the same name as the dimid,
* but with a different dimid, then record that as duplicate=true.
* 4. If dimgrp is defined and duplicate == false, then we do not need an fqn.
* 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid using the duplicate.
* 6. if dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the
* dimid.
* 7. If found, then use the fqn of that dimension location.
* 8. If not found, then signal NC_EBADDIM.
*/
int target_dimid, dimgrp, duplicate, stopsearch, usefqn;
target_dimid = var.dims[id];
dimgrp = ncid; /* start with the parent group of the variable */
duplicate = 0;
usefqn = 0;
/* Walk up the ancestor groups */
for(stopsearch=0;stopsearch==0;) {
int tmpid;
int localdimid;
int ret = NC_NOERR;
ret = nc_inq_dimid(dimgrp,dim_name,&localdimid);
switch (ret) {
case NC_NOERR: /* We have a name match */
if(localdimid == target_dimid) stopsearch = 1; /* 1 means stop because found */
else duplicate = 1;
break;
case NC_EBADDIM:
break; /* no match at all */
default: NC_CHECK(ret);
}
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
if(stopsearch != 0) break; /* no need to continue */
/* move to ancestor group */
ret = nc_inq_grp_parent(dimgrp,&tmpid);
switch(ret) {
case NC_NOERR:
dimgrp = tmpid;
break;
case NC_ENOGRP:
/* we processed the root, so try the breadth-first search */
stopsearch = -1; /* -1 means we hit the root group but did not find it */
rootncid = dimgrp;
break;
default: NC_CHECK(ret);
}
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
}
assert(stopsearch != 0);
if(stopsearch == 1) {
/* We found it; do we need to use fqn */
usefqn = duplicate;
} else { /* stopsearch == -1 */
/* do the whole-tree search */
usefqn = 1;
NC_CHECK(searchgrouptreedim(rootncid,target_dimid,&dimgrp));
/* group containing target dimid is in group dimgrp */
}
if(usefqn) {
/* use fully qualified name (fqn) for the dimension name by prefixing dimname
with group name */
size_t len;
char *grpfqn = NULL; /* the group fqn */
NC_CHECK( nc_inq_grpname_full(dimgrp, &len, NULL) );
grpfqn = emalloc(len + 1);
NC_CHECK( nc_inq_grpname_full(dimgrp, &len, grpfqn) );
print_name (grpfqn);
if(strcmp("/", grpfqn) != 0) /* not the root group */
printf("/"); /* ensure a trailing slash */
free(grpfqn);
}
#endif /*USE_NETCDF4*/
print_name (dim_name);
printf ("%s", id < var.ndims-1 ? ", " : RPAREN);
2010-06-03 21:24:43 +08:00
}
printf (" ;\n");
/* print variable attributes */
for (ia = 0; ia < var.natts; ia++) { /* print ia-th attribute */
pr_att(ncid, kind, varid, var.name, ia);
}
#ifdef USE_NETCDF4
/* Print special (virtual) attributes, if option specified */
if (formatting_specs.special_atts) {
2010-06-03 21:24:43 +08:00
pr_att_specials(ncid, kind, varid, &var);
}
#endif /* USE_NETCDF4 */
}
if (ngatts > 0 || formatting_specs.special_atts) {
2010-06-03 21:24:43 +08:00
printf ("\n");
indent_out();
2011-08-26 11:54:25 +08:00
if (is_root)
2010-06-03 21:24:43 +08:00
printf("// global attributes:\n");
else
printf("// group attributes:\n");
}
for (ia = 0; ia < ngatts; ia++) { /* print ia-th global attribute */
pr_att(ncid, kind, NC_GLOBAL, "", ia);
}
if (is_root && formatting_specs.special_atts) { /* output special attribute
2010-06-03 21:24:43 +08:00
* for format variant */
pr_att_hidden(ncid, kind);
2010-06-03 21:24:43 +08:00
pr_att_global_format(ncid, kind);
}
fflush(stdout);
2011-09-22 07:10:03 +08:00
/* output variable data, unless "-h" option specified header only
* or this group is not in list of groups specified by "-g"
* option */
if (! formatting_specs.header_only &&
group_wanted(ncid, formatting_specs.nlgrps, formatting_specs.grpids) ) {
2010-06-03 21:24:43 +08:00
if (nvars > 0) {
indent_out();
printf ("data:\n");
}
for (varid = 0; varid < nvars; varid++) {
int no_data;
/* if var list specified, test for membership */
2011-09-22 07:10:03 +08:00
if (formatting_specs.nlvars > 0 && ! idmember(vlist, varid))
2010-06-03 21:24:43 +08:00
continue;
NC_CHECK( nc_inq_varndims(ncid, varid, &var.ndims) );
if(var.dims != NULL) free(var.dims);
var.dims = (int *) emalloc((var.ndims + 1) * sizeof(int));
NC_CHECK( nc_inq_var(ncid, varid, var.name, &var.type, 0,
var.dims, &var.natts) );
var.tinfo = get_typeinfo(var.type);
/* If coords-only option specified, don't get data for
* non-coordinate vars */
if (formatting_specs.coord_vals && !iscoordvar(ncid,varid)) {
2010-06-03 21:24:43 +08:00
continue;
}
/* Collect variable's dim sizes */
if (vdims) {
free(vdims);
vdims = 0;
}
vdims = (size_t *) emalloc((var.ndims + 1) * SIZEOF_SIZE_T);
no_data = 0;
for (id = 0; id < var.ndims; id++) {
size_t len;
NC_CHECK( nc_inq_dimlen(ncid, var.dims[id], &len) );
if(len == 0) {
no_data = 1;
}
vdims[id] = len;
}
/* Don't get data for record variables if no records have
* been written yet */
if (no_data) {
free(vdims);
vdims = 0;
continue;
}
get_fill_info(ncid, varid, &var); /* sets has_fillval, fillvalp mmbrs */
if(var.timeinfo != NULL) {
if(var.timeinfo->units) free(var.timeinfo->units);
free(var.timeinfo);
}
get_timeinfo(ncid, varid, &var); /* sets has_timeval, timeinfo mmbrs */
/* printf format used to print each value */
var.fmt = get_fmt(ncid, varid, var.type);
var.locid = ncid;
set_tostring_func(&var);
if (vardata(&var, vdims, ncid, varid) == -1) {
2010-06-03 21:24:43 +08:00
error("can't output data for variable %s", var.name);
goto done;
}
Fix more memory leaks in netcdf-c library This is a follow up to PR https://github.com/Unidata/netcdf-c/pull/1173 Sorry that it is so big, but leak suppression can be complex. This PR fixes all remaining memory leaks -- as determined by -fsanitize=address, and with the exceptions noted below. Unfortunately. there remains a significant leak that I cannot solve. It involves vlens, and it is unclear if the leak is occurring in the netcdf-c library or the HDF5 library. I have added a check_PROGRAM to the ncdump directory to show the problem. The program is called tst_vlen_demo.c To exercise it, build the netcdf library with -fsanitize=address enabled. Then go into ncdump and do a "make clean check". This should build tst_vlen_demo without actually executing it. Then do the command "./tst_vlen_demo" to see the output of the memory checker. Note the the lost malloc is deep in the HDF5 library (in H5Tvlen.c). I am temporarily working around this error in the following way. 1. I modified several test scripts to not execute known vlen tests that fail as described above. 2. Added an environment variable called NC_VLEN_NOTEST. If set, then those specific tests are suppressed. This should mean that the --disable-utilities option to ./configure should not need to be set to get a memory leak clean build. This should allow for detection of any new leaks. Note: I used an environment variable rather than a ./configure option to control the vlen tests. This is because it is temporary (I hope) and because it is a bit tricky for shell scripts to access ./configure options. Finally, as before, this only been tested with netcdf-4 and hdf5 support.
2018-11-16 01:00:38 +08:00
if(var.fillvalp != NULL)
Fix various problem around VLEN's re: https://github.com/Unidata/netcdf-c/issues/541 re: https://github.com/Unidata/netcdf-c/issues/1208 re: https://github.com/Unidata/netcdf-c/issues/2078 re: https://github.com/Unidata/netcdf-c/issues/2041 re: https://github.com/Unidata/netcdf-c/issues/2143 For a long time, there have been known problems with the management of complex types containing VLENs. This also involves the string type because it is stored as a VLEN of chars. This PR (mostly) fixes this problem. But note that it adds new functions to netcdf.h (see below) and this may require bumping the .so number. These new functions can be removed, if desired, in favor of functions in netcdf_aux.h, but netcdf.h seems the better place for them because they are intended as alternatives to the nc_free_vlen and nc_free_string functions already in netcdf.h. The term complex type refers to any type that directly or transitively references a VLEN type. So an array of VLENS, a compound with a VLEN field, and so on. In order to properly handle instances of these complex types, it is necessary to have function that can recursively walk instances of such types to perform various actions on them. The term "deep" is also used to mean recursive. At the moment, the two operations needed by the netcdf library are: * free'ing an instance of the complex type * copying an instance of the complex type. The current library does only shallow free and shallow copy of complex types. This means that only the top level is properly free'd or copied, but deep internal blocks in the instance are not touched. Note that the term "vector" will be used to mean a contiguous (in memory) sequence of instances of some type. Given an array with, say, dimensions 2 X 3 X 4, this will be stored in memory as a vector of length 2*3*4=24 instances. The use cases are primarily these. ## nc_get_vars Suppose one is reading a vector of instances using nc_get_vars (or nc_get_vara or nc_get_var, etc.). These functions will return the vector in the top-level memory provided. All interior blocks (form nested VLEN or strings) will have been dynamically allocated. After using this vector of instances, it is necessary to free (aka reclaim) the dynamically allocated memory, otherwise a memory leak occurs. So, the recursive reclaim function is used to walk the returned instance vector and do a deep reclaim of the data. Currently functions are defined in netcdf.h that are supposed to handle this: nc_free_vlen(), nc_free_vlens(), and nc_free_string(). Unfortunately, these functions only do a shallow free, so deeply nested instances are not properly handled by them. Note that internally, the provided data is immediately written so there is no need to copy it. But the caller may need to reclaim the data it passed into the function. ## nc_put_att Suppose one is writing a vector of instances as the data of an attribute using, say, nc_put_att. Internally, the incoming attribute data must be copied and stored so that changes/reclamation of the input data will not affect the attribute. Again, the code inside the netcdf library does only shallow copying rather than deep copy. As a result, one sees effects such as described in Github Issue https://github.com/Unidata/netcdf-c/issues/2143. Also, after defining the attribute, it may be necessary for the user to free the data that was provided as input to nc_put_att(). ## nc_get_att Suppose one is reading a vector of instances as the data of an attribute using, say, nc_get_att. Internally, the existing attribute data must be copied and returned to the caller, and the caller is responsible for reclaiming the returned data. Again, the code inside the netcdf library does only shallow copying rather than deep copy. So this can lead to memory leaks and errors because the deep data is shared between the library and the user. # Solution The solution is to build properly recursive reclaim and copy functions and use those as needed. These recursive functions are defined in libdispatch/dinstance.c and their signatures are defined in include/netcdf.h. For back compatibility, corresponding "ncaux_XXX" functions are defined in include/netcdf_aux.h. ```` int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy); int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp); ```` There are two variants. The first two, nc_reclaim_data() and nc_copy_data(), assume the top-level vector is managed by the caller. For reclaim, this is so the user can use, for example, a statically allocated vector. For copy, it assumes the user provides the space into which the copy is stored. The second two, nc_reclaim_data_all() and nc_copy_data_all(), allows the functions to manage the top-level. So for nc_reclaim_data_all, the top level is assumed to be dynamically allocated and will be free'd by nc_reclaim_data_all(). The nc_copy_data_all() function will allocate the top level and return a pointer to it to the user. The user can later pass that pointer to nc_reclaim_data_all() to reclaim the instance(s). # Internal Changes The netcdf-c library internals are changed to use the proper reclaim and copy functions. It turns out that the places where these functions are needed is quite pervasive in the netcdf-c library code. Using these functions also allows some simplification of the code since the stdata and vldata fields of NC_ATT_INFO are no longer needed. Currently this is commented out using the SEPDATA \#define macro. When any bugs are largely fixed, all this code will be removed. # Known Bugs 1. There is still one known failure that has not been solved. All the failures revolve around some variant of this .cdl file. The proximate cause of failure is the use of a VLEN FillValue. ```` netcdf x { types: float(*) row_of_floats ; dimensions: m = 5 ; variables: row_of_floats ragged_array(m) ; row_of_floats ragged_array:_FillValue = {-999} ; data: ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32}, {40, 41}, _ ; } ```` When a solution is found, I will either add it to this PR or post a new PR. # Related Changes * Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data. * Remove the --enable-unfixed-memory-leaks option. * Remove the NC_VLENS_NOTEST code that suppresses some vlen tests. * Document this change in docs/internal.md * Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh. * Mark types as fixed size or not (transitively) to optimize the reclaim and copy functions. # Misc. Changes * Make Doxygen process libdispatch/daux.c * Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
{NC_CHECK(nc_reclaim_data_all(ncid,var.tinfo->tid,var.fillvalp,1)); var.fillvalp = NULL;}
2010-06-03 21:24:43 +08:00
}
if (vdims) {
free(vdims);
vdims = 0;
}
}
#ifdef USE_NETCDF4
/* For netCDF-4 compiles, check to see if the file has any
* groups. If it does, this function is called recursively on each
* of them. */
{
int g, numgrps, *ncids;
char group_name[NC_MAX_NAME + 1];
/* See how many groups there are. */
NC_CHECK( nc_inq_grps(ncid, &numgrps, NULL) );
2010-06-03 21:24:43 +08:00
/* Allocate memory to hold the list of group ids. */
ncids = emalloc((numgrps + 1) * sizeof(int));
2010-06-03 21:24:43 +08:00
/* Get the list of group ids. */
NC_CHECK( nc_inq_grps(ncid, NULL, ncids) );
2010-06-03 21:24:43 +08:00
/* Call this function for each group. */
for (g = 0; g < numgrps; g++)
{
NC_CHECK( nc_inq_grpname(ncids[g], group_name) );
printf ("\n");
indent_out();
/* printf ("group: %s {\n", group_name); */
printf ("group: ");
print_name (group_name);
printf (" {\n");
indent_more();
do_ncdump_rec(ncids[g], NULL);
2010-06-03 21:24:43 +08:00
indent_out();
/* printf ("} // group %s\n", group_name); */
printf ("} // group ");
print_name (group_name);
printf ("\n");
indent_less();
}
2010-06-03 21:24:43 +08:00
free(ncids);
}
#endif /* USE_NETCDF4 */
done:
if(var.dims != NULL) free(var.dims);
Fix more memory leaks in netcdf-c library This is a follow up to PR https://github.com/Unidata/netcdf-c/pull/1173 Sorry that it is so big, but leak suppression can be complex. This PR fixes all remaining memory leaks -- as determined by -fsanitize=address, and with the exceptions noted below. Unfortunately. there remains a significant leak that I cannot solve. It involves vlens, and it is unclear if the leak is occurring in the netcdf-c library or the HDF5 library. I have added a check_PROGRAM to the ncdump directory to show the problem. The program is called tst_vlen_demo.c To exercise it, build the netcdf library with -fsanitize=address enabled. Then go into ncdump and do a "make clean check". This should build tst_vlen_demo without actually executing it. Then do the command "./tst_vlen_demo" to see the output of the memory checker. Note the the lost malloc is deep in the HDF5 library (in H5Tvlen.c). I am temporarily working around this error in the following way. 1. I modified several test scripts to not execute known vlen tests that fail as described above. 2. Added an environment variable called NC_VLEN_NOTEST. If set, then those specific tests are suppressed. This should mean that the --disable-utilities option to ./configure should not need to be set to get a memory leak clean build. This should allow for detection of any new leaks. Note: I used an environment variable rather than a ./configure option to control the vlen tests. This is because it is temporary (I hope) and because it is a bit tricky for shell scripts to access ./configure options. Finally, as before, this only been tested with netcdf-4 and hdf5 support.
2018-11-16 01:00:38 +08:00
if(var.fillvalp != NULL) {
/* Release any data hanging off of fillvalp */
Fix various problem around VLEN's re: https://github.com/Unidata/netcdf-c/issues/541 re: https://github.com/Unidata/netcdf-c/issues/1208 re: https://github.com/Unidata/netcdf-c/issues/2078 re: https://github.com/Unidata/netcdf-c/issues/2041 re: https://github.com/Unidata/netcdf-c/issues/2143 For a long time, there have been known problems with the management of complex types containing VLENs. This also involves the string type because it is stored as a VLEN of chars. This PR (mostly) fixes this problem. But note that it adds new functions to netcdf.h (see below) and this may require bumping the .so number. These new functions can be removed, if desired, in favor of functions in netcdf_aux.h, but netcdf.h seems the better place for them because they are intended as alternatives to the nc_free_vlen and nc_free_string functions already in netcdf.h. The term complex type refers to any type that directly or transitively references a VLEN type. So an array of VLENS, a compound with a VLEN field, and so on. In order to properly handle instances of these complex types, it is necessary to have function that can recursively walk instances of such types to perform various actions on them. The term "deep" is also used to mean recursive. At the moment, the two operations needed by the netcdf library are: * free'ing an instance of the complex type * copying an instance of the complex type. The current library does only shallow free and shallow copy of complex types. This means that only the top level is properly free'd or copied, but deep internal blocks in the instance are not touched. Note that the term "vector" will be used to mean a contiguous (in memory) sequence of instances of some type. Given an array with, say, dimensions 2 X 3 X 4, this will be stored in memory as a vector of length 2*3*4=24 instances. The use cases are primarily these. ## nc_get_vars Suppose one is reading a vector of instances using nc_get_vars (or nc_get_vara or nc_get_var, etc.). These functions will return the vector in the top-level memory provided. All interior blocks (form nested VLEN or strings) will have been dynamically allocated. After using this vector of instances, it is necessary to free (aka reclaim) the dynamically allocated memory, otherwise a memory leak occurs. So, the recursive reclaim function is used to walk the returned instance vector and do a deep reclaim of the data. Currently functions are defined in netcdf.h that are supposed to handle this: nc_free_vlen(), nc_free_vlens(), and nc_free_string(). Unfortunately, these functions only do a shallow free, so deeply nested instances are not properly handled by them. Note that internally, the provided data is immediately written so there is no need to copy it. But the caller may need to reclaim the data it passed into the function. ## nc_put_att Suppose one is writing a vector of instances as the data of an attribute using, say, nc_put_att. Internally, the incoming attribute data must be copied and stored so that changes/reclamation of the input data will not affect the attribute. Again, the code inside the netcdf library does only shallow copying rather than deep copy. As a result, one sees effects such as described in Github Issue https://github.com/Unidata/netcdf-c/issues/2143. Also, after defining the attribute, it may be necessary for the user to free the data that was provided as input to nc_put_att(). ## nc_get_att Suppose one is reading a vector of instances as the data of an attribute using, say, nc_get_att. Internally, the existing attribute data must be copied and returned to the caller, and the caller is responsible for reclaiming the returned data. Again, the code inside the netcdf library does only shallow copying rather than deep copy. So this can lead to memory leaks and errors because the deep data is shared between the library and the user. # Solution The solution is to build properly recursive reclaim and copy functions and use those as needed. These recursive functions are defined in libdispatch/dinstance.c and their signatures are defined in include/netcdf.h. For back compatibility, corresponding "ncaux_XXX" functions are defined in include/netcdf_aux.h. ```` int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count); int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy); int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp); ```` There are two variants. The first two, nc_reclaim_data() and nc_copy_data(), assume the top-level vector is managed by the caller. For reclaim, this is so the user can use, for example, a statically allocated vector. For copy, it assumes the user provides the space into which the copy is stored. The second two, nc_reclaim_data_all() and nc_copy_data_all(), allows the functions to manage the top-level. So for nc_reclaim_data_all, the top level is assumed to be dynamically allocated and will be free'd by nc_reclaim_data_all(). The nc_copy_data_all() function will allocate the top level and return a pointer to it to the user. The user can later pass that pointer to nc_reclaim_data_all() to reclaim the instance(s). # Internal Changes The netcdf-c library internals are changed to use the proper reclaim and copy functions. It turns out that the places where these functions are needed is quite pervasive in the netcdf-c library code. Using these functions also allows some simplification of the code since the stdata and vldata fields of NC_ATT_INFO are no longer needed. Currently this is commented out using the SEPDATA \#define macro. When any bugs are largely fixed, all this code will be removed. # Known Bugs 1. There is still one known failure that has not been solved. All the failures revolve around some variant of this .cdl file. The proximate cause of failure is the use of a VLEN FillValue. ```` netcdf x { types: float(*) row_of_floats ; dimensions: m = 5 ; variables: row_of_floats ragged_array(m) ; row_of_floats ragged_array:_FillValue = {-999} ; data: ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32}, {40, 41}, _ ; } ```` When a solution is found, I will either add it to this PR or post a new PR. # Related Changes * Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data. * Remove the --enable-unfixed-memory-leaks option. * Remove the NC_VLENS_NOTEST code that suppresses some vlen tests. * Document this change in docs/internal.md * Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh. * Mark types as fixed size or not (transitively) to optimize the reclaim and copy functions. # Misc. Changes * Make Doxygen process libdispatch/daux.c * Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
nc_reclaim_data_all(ncid,var.tinfo->tid,var.fillvalp,1);
Fix more memory leaks in netcdf-c library This is a follow up to PR https://github.com/Unidata/netcdf-c/pull/1173 Sorry that it is so big, but leak suppression can be complex. This PR fixes all remaining memory leaks -- as determined by -fsanitize=address, and with the exceptions noted below. Unfortunately. there remains a significant leak that I cannot solve. It involves vlens, and it is unclear if the leak is occurring in the netcdf-c library or the HDF5 library. I have added a check_PROGRAM to the ncdump directory to show the problem. The program is called tst_vlen_demo.c To exercise it, build the netcdf library with -fsanitize=address enabled. Then go into ncdump and do a "make clean check". This should build tst_vlen_demo without actually executing it. Then do the command "./tst_vlen_demo" to see the output of the memory checker. Note the the lost malloc is deep in the HDF5 library (in H5Tvlen.c). I am temporarily working around this error in the following way. 1. I modified several test scripts to not execute known vlen tests that fail as described above. 2. Added an environment variable called NC_VLEN_NOTEST. If set, then those specific tests are suppressed. This should mean that the --disable-utilities option to ./configure should not need to be set to get a memory leak clean build. This should allow for detection of any new leaks. Note: I used an environment variable rather than a ./configure option to control the vlen tests. This is because it is temporary (I hope) and because it is a bit tricky for shell scripts to access ./configure options. Finally, as before, this only been tested with netcdf-4 and hdf5 support.
2018-11-16 01:00:38 +08:00
var.fillvalp = NULL;
}
2010-06-03 21:24:43 +08:00
if(var.timeinfo != NULL) {
if(var.timeinfo->units) free(var.timeinfo->units);
free(var.timeinfo);
}
if (dims)
free(dims);
if (vlist)
freeidlist(vlist);
2010-06-03 21:24:43 +08:00
}
static void
do_ncdump(int ncid, const char *path)
2010-06-03 21:24:43 +08:00
{
char* esc_specname;
/* output initial line */
indent_init();
indent_out();
esc_specname=escaped_name(formatting_specs.name);
2010-06-03 21:24:43 +08:00
printf ("netcdf %s {\n", esc_specname);
free(esc_specname);
do_ncdump_rec(ncid, path);
2010-06-03 21:24:43 +08:00
indent_out();
printf ("}\n");
}
static void
do_ncdumpx(int ncid, const char *path)
2010-06-03 21:24:43 +08:00
{
int ndims; /* number of dimensions */
int nvars; /* number of variables */
int ngatts; /* number of global attributes */
int xdimid; /* id of unlimited dimension */
int dimid; /* dimension id */
int varid; /* variable id */
ncdim_t *dims; /* dimensions */
ncvar_t var; /* variable */
int ia; /* attribute number */
int iv; /* variable number */
idnode_t* vlist = NULL; /* list for vars specified with -v option */
2010-06-03 21:24:43 +08:00
/*
* If any vars were specified with -v option, get list of associated
* variable ids
*/
if (formatting_specs.nlvars > 0) {
2011-09-22 07:10:03 +08:00
vlist = newidlist(); /* list for vars specified with -v option */
for (iv=0; iv < formatting_specs.nlvars; iv++) {
NC_CHECK( nc_inq_varid(ncid, formatting_specs.lvars[iv], &varid) );
2011-09-22 07:10:03 +08:00
idadd(vlist, varid);
2010-06-03 21:24:43 +08:00
}
}
/* output initial line */
pr_initx(ncid, path);
/*
* get number of dimensions, number of variables, number of global
* atts, and dimension id of unlimited dimension, if any
*/
/* TODO: print names with XML-ish escapes fopr special chars */
NC_CHECK( nc_inq(ncid, &ndims, &nvars, &ngatts, &xdimid) );
/* get dimension info */
dims = (ncdim_t *) emalloc((ndims + 1) * sizeof(ncdim_t));
for (dimid = 0; dimid < ndims; dimid++) {
NC_CHECK( nc_inq_dim(ncid, dimid, dims[dimid].name, &dims[dimid].size) );
if (dimid == xdimid)
printf(" <dimension name=\"%s\" length=\"%d\" isUnlimited=\"true\" />\n",
2010-06-03 21:24:43 +08:00
dims[dimid].name, (int)dims[dimid].size);
else
printf (" <dimension name=\"%s\" length=\"%d\" />\n",
2010-06-03 21:24:43 +08:00
dims[dimid].name, (int)dims[dimid].size);
}
/* get global attributes */
for (ia = 0; ia < ngatts; ia++)
pr_attx(ncid, NC_GLOBAL, ia); /* print ia-th global attribute */
/* get variable info, with variable attributes */
memset((void*)&var,0,sizeof(var));
for (varid = 0; varid < nvars; varid++) {
NC_CHECK( nc_inq_varndims(ncid, varid, &var.ndims) );
if(var.dims != NULL) free(var.dims);
var.dims = (int *) emalloc((var.ndims + 1) * sizeof(int));
NC_CHECK( nc_inq_var(ncid, varid, var.name, &var.type, 0,
var.dims, &var.natts) );
printf (" <variable name=\"%s\"", var.name);
pr_shape(&var, dims);
/* handle one-line variable elements that aren't containers
for attributes or data values, since they need to be
rendered as <variable ... /> instead of <variable ..>
... </variable> */
if (var.natts == 0) {
if (
/* header-only specified */
(formatting_specs.header_only) ||
2010-06-03 21:24:43 +08:00
/* list of variables specified and this variable not in list */
2011-09-22 07:10:03 +08:00
(formatting_specs.nlvars > 0 && !idmember(vlist, varid)) ||
2010-06-03 21:24:43 +08:00
/* coordinate vars only and this is not a coordinate variable */
(formatting_specs.coord_vals && !iscoordvar(ncid, varid)) ||
2010-06-03 21:24:43 +08:00
/* this is a record variable, but no records have been written */
(isrecvar(ncid,varid) && dims[xdimid].size == 0)
) {
printf (" type=\"%s\" />\n", prim_type_name(var.type));
continue;
}
}
/* else nest attributes values, data values in <variable> ... </variable> */
printf (" type=\"%s\">\n", prim_type_name(var.type));
/* get variable attributes */
for (ia = 0; ia < var.natts; ia++) {
pr_attx(ncid, varid, ia); /* print ia-th attribute */
}
printf (" </variable>\n");
}
2010-06-03 21:24:43 +08:00
printf ("</netcdf>\n");
if (vlist)
freeidlist(vlist);
2010-06-03 21:24:43 +08:00
if(dims)
free(dims);
if(var.dims != NULL)
free(var.dims);
2010-06-03 21:24:43 +08:00
}
/*
* Extract the significant-digits specifiers from the (deprecated and
* undocumented) -d argument on the command-line and update the
* default data formats appropriately. This only exists because an
* old version of ncdump supported the "-d" flag which did not
* override the C_format attributes (if any).
*/
static void
set_sigdigs(const char *optarg)
{
char *ptr1 = 0;
char *ptr2 = 0;
int flt_digits = FLT_DIGITS; /* default floating-point digits */
int dbl_digits = DBL_DIGITS; /* default double-precision digits */
if (optarg != 0 && (int) strlen(optarg) > 0 && optarg[0] != ',')
flt_digits = (int)strtol(optarg, &ptr1, 10);
if (flt_digits < 1 || flt_digits > 20) {
error("unreasonable value for float significant digits: %d",
flt_digits);
}
if (ptr1 && *ptr1 == ',') {
dbl_digits = (int)strtol(ptr1+1, &ptr2, 10);
if (ptr2 == ptr1+1 || dbl_digits < 1 || dbl_digits > 20) {
error("unreasonable value for double significant digits: %d",
dbl_digits);
}
}
set_formats(flt_digits, dbl_digits);
}
/*
* Extract the significant-digits specifiers from the -p argument on the
* command-line, set flags so we can override C_format attributes (if any),
* and update the default data formats appropriately.
*/
static void
set_precision(const char *optarg)
{
char *ptr1 = 0;
char *ptr2 = 0;
int flt_digits = FLT_DIGITS; /* default floating-point digits */
int dbl_digits = DBL_DIGITS; /* default double-precision digits */
if (optarg != 0 && (int) strlen(optarg) > 0 && optarg[0] != ',') {
flt_digits = (int)strtol(optarg, &ptr1, 10);
float_precision_specified = 1;
}
if (flt_digits < 1 || flt_digits > 20) {
error("unreasonable value for float significant digits: %d",
flt_digits);
}
if (ptr1 && *ptr1 == ',') {
dbl_digits = (int) strtol(ptr1+1, &ptr2, 10);
double_precision_specified = 1;
if (ptr2 == ptr1+1 || dbl_digits < 1 || dbl_digits > 20) {
error("unreasonable value for double significant digits: %d",
dbl_digits);
}
}
set_formats(flt_digits, dbl_digits);
}
2011-08-26 11:54:25 +08:00
#ifdef USE_DAP
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
#define DAP_CLIENT_CACHE_DIRECTIVE "cache"
2010-06-03 21:24:43 +08:00
/* replace path string with same string prefixed by
* DAP_CLIENT_NCDUMP_DIRECTIVE */
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
static void
adapt_url_for_cache(char **pathp)
{
2010-06-03 21:24:43 +08:00
char* path = *pathp;
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
NCURI* url = NULL;
ncuriparse(path,&url);
if(url == NULL) return;
ncuriappendfragmentkey(url,DAP_CLIENT_CACHE_DIRECTIVE,NULL);
if(*pathp) free(*pathp);
path = ncuribuild(url,NULL,NULL,NCURIALL);
if(pathp) {*pathp = path; path = NULL;}
ncurifree(url);
nullfree(path);
2010-06-03 21:24:43 +08:00
return;
}
#endif
2010-06-03 21:24:43 +08:00
int
main(int argc, char *argv[])
{
int ncstat = NC_NOERR;
2010-06-03 21:24:43 +08:00
int c;
int i;
int max_len = 80; /* default maximum line length */
int nameopt = 0;
bool_t xml_out = false; /* if true, output NcML instead of CDL */
bool_t kind_out = false; /* if true, just output kind of netCDF file */
bool_t kind_out_extended = false; /* output inq_format vs inq_format_extended */
int Xp_flag = 0; /* indicate that -Xp flag was set */
char* path = NULL;
char errmsg[4096];
errmsg[0] = '\0';
2010-06-03 21:24:43 +08:00
#if defined(_WIN32) || defined(msdos) || defined(WIN64)
putenv("PRINTF_EXPONENT_DIGITS=2"); /* Enforce unix/linux style exponent formatting. */
#endif
2010-06-03 21:24:43 +08:00
progname = argv[0];
set_formats(FLT_DIGITS, DBL_DIGITS); /* default for float, double data */
/* If the user called ncdump without arguments, print the usage
* message and return peacefully. */
if (argc <= 1)
{
usage();
exit(EXIT_SUCCESS);
2010-06-03 21:24:43 +08:00
}
opterr = 1;
while ((c = getopt(argc, argv, "b:cd:f:g:hikl:n:p:stv:xwKL:X:")) != EOF)
2010-06-03 21:24:43 +08:00
switch(c) {
case 'h': /* dump header only, no data */
formatting_specs.header_only = true;
2010-06-03 21:24:43 +08:00
break;
case 'c': /* header, data only for coordinate dims */
formatting_specs.coord_vals = true;
2010-06-03 21:24:43 +08:00
break;
case 'n': /*
* provide different name than derived from
* file name
*/
formatting_specs.name = optarg;
2010-06-03 21:24:43 +08:00
nameopt = 1;
break;
case 'b': /* brief comments in data section */
formatting_specs.brief_data_cmnts = true;
switch (tolower((int)optarg[0])) {
2010-06-03 21:24:43 +08:00
case 'c':
formatting_specs.data_lang = LANG_C;
2010-06-03 21:24:43 +08:00
break;
case 'f':
formatting_specs.data_lang = LANG_F;
2010-06-03 21:24:43 +08:00
break;
default:
snprintf(errmsg,sizeof(errmsg),"invalid value for -b option: %s", optarg);
goto fail;
2010-06-03 21:24:43 +08:00
}
break;
case 'f': /* full comments in data section */
formatting_specs.full_data_cmnts = true;
switch (tolower((int)optarg[0])) {
2010-06-03 21:24:43 +08:00
case 'c':
formatting_specs.data_lang = LANG_C;
2010-06-03 21:24:43 +08:00
break;
case 'f':
formatting_specs.data_lang = LANG_F;
2010-06-03 21:24:43 +08:00
break;
default:
snprintf(errmsg,sizeof(errmsg),"invalid value for -f option: %s", optarg);
goto fail;
2010-06-03 21:24:43 +08:00
}
break;
case 'l': /* maximum line length */
max_len = (int) strtol(optarg, 0, 0);
if (max_len < 10) {
snprintf(errmsg,sizeof(errmsg),"unreasonably small line length specified: %d", max_len);
goto fail;
2010-06-03 21:24:43 +08:00
}
break;
case 'v': /* variable names */
/* make list of names of variables specified */
make_lvars (optarg, &formatting_specs.nlvars, &formatting_specs.lvars);
2010-06-03 21:24:43 +08:00
break;
2011-08-26 11:54:25 +08:00
case 'g': /* group names */
/* make list of names of groups specified */
make_lgrps (optarg, &formatting_specs.nlgrps, &formatting_specs.lgrps,
&formatting_specs.grpids);
2011-08-26 11:54:25 +08:00
break;
2010-06-03 21:24:43 +08:00
case 'd': /* specify precision for floats (deprecated, undocumented) */
set_sigdigs(optarg);
break;
case 'p': /* specify precision for floats, overrides attribute specs */
set_precision(optarg);
break;
case 'x': /* XML output (NcML) */
xml_out = true;
break;
case 'k': /* just output what kind of netCDF file */
kind_out = true;
break;
case 'K': /* extended format info */
kind_out_extended = true;
break;
case 't': /* human-readable strings for date-time values */
formatting_specs.string_times = true;
formatting_specs.iso_separator = false;
break;
case 'i': /* human-readable strings for data-time values with 'T' separator */
formatting_specs.string_times = true;
formatting_specs.iso_separator = true;
2010-06-03 21:24:43 +08:00
break;
case 's': /* output special (virtual) attributes for
* netCDF-4 files and variables, including
* _DeflateLevel, _Chunking, _Endianness,
* _Format, _Checksum, _NoFill */
formatting_specs.special_atts = true;
2010-06-03 21:24:43 +08:00
break;
case 'w': /* with client-side cache for DAP URLs */
formatting_specs.with_cache = true;
2010-06-03 21:24:43 +08:00
break;
case 'X': /* special options */
switch (tolower((int)optarg[0])) {
case 'm':
formatting_specs.xopt_inmemory = 1;
break;
case 'p': /* suppress the properties attribute */
Xp_flag = 1; /* record that this flag was set */
break;
default:
snprintf(errmsg,sizeof(errmsg),"invalid value for -X option: %s", optarg);
goto fail;
}
break;
case 'L':
#ifdef LOGGING
{
int level = atoi(optarg);
if(level >= 0)
nc_set_log_level(level);
}
#endif
ncsetlogging(1);
break;
2010-06-03 21:24:43 +08:00
case '?':
usage();
exit(EXIT_FAILURE);
2010-06-03 21:24:43 +08:00
}
/* Decide xopt_props */
if(formatting_specs.special_atts && Xp_flag == 1)
formatting_specs.xopt_props = 0;
else if(formatting_specs.special_atts && Xp_flag == 0)
formatting_specs.xopt_props = 1;
else if(!formatting_specs.special_atts)
formatting_specs.xopt_props = 0;
else
formatting_specs.xopt_props = 0;
2010-06-03 21:24:43 +08:00
set_max_len(max_len);
2010-06-03 21:24:43 +08:00
argc -= optind;
argv += optind;
/* If no file arguments left or more than one, print usage message. */
if (argc != 1)
{
usage();
exit(EXIT_FAILURE);
2010-06-03 21:24:43 +08:00
}
i = 0;
init_epsilons();
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
/* We need to look for escape characters because the argument
may have come in via a shell script */
path = NC_shellUnescape(argv[i]);
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
if(path == NULL) {
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
snprintf(errmsg,sizeof(errmsg),"out of memory un-escaping argument %s", argv[i]);
goto fail;
}
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
if (!nameopt)
formatting_specs.name = name_path(path);
if (argc > 0) {
int ncid;
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
/* If path is a URL, do some fixups */
if(nc__testurl(path, NULL)) {/* See if this is a url */
/* Prefix with client-side directive to
* make ncdump reasonably efficient */
2010-06-03 21:24:43 +08:00
#ifdef USE_DAP
if(formatting_specs.with_cache) { /* by default, don't use cache directive */
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
adapt_url_for_cache(&path);
2010-06-03 21:24:43 +08:00
}
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
#endif
} /* else fall thru and treat like a file path */
if(formatting_specs.xopt_inmemory) {
#if 0
size_t size = 0;
void* mem = NULL;
ncstat = fileopen(path,&mem,&size);
if(ncstat == NC_NOERR)
ncstat = nc_open_mem(path,NC_INMEMORY,size,mem,&ncid);
#else
ncstat = nc_open(path,NC_DISKLESS|NC_NOWRITE,&ncid);
#endif
} else /* just a file */
ncstat = nc_open(path, NC_NOWRITE, &ncid);
if (ncstat != NC_NOERR) goto fail;
2011-09-22 07:10:03 +08:00
NC_CHECK( nc_inq_format(ncid, &formatting_specs.nc_kind) );
NC_CHECK( nc_inq_format_extended(ncid,
&formatting_specs.nc_extended,
&formatting_specs.nc_mode) );
2010-06-03 21:24:43 +08:00
if (kind_out) {
printf ("%s\n", kind_string(formatting_specs.nc_kind));
} else if (kind_out_extended) {
printf ("%s\n", kind_string_extended(formatting_specs.nc_extended,formatting_specs.nc_mode));
2010-06-03 21:24:43 +08:00
} else {
/* Initialize list of types. */
init_types(ncid);
/* Check if any vars in -v don't exist */
if(missing_vars(ncid, formatting_specs.nlvars, formatting_specs.lvars)) {
snprintf(errmsg,sizeof(errmsg),"-v: non-existent variables");
goto fail;
}
2011-09-22 07:10:03 +08:00
if(formatting_specs.nlgrps > 0) {
if(formatting_specs.nc_kind != NC_FORMAT_NETCDF4)
goto fail;
2011-09-22 07:10:03 +08:00
/* Check if any grps in -g don't exist */
if(grp_matches(ncid, formatting_specs.nlgrps, formatting_specs.lgrps, formatting_specs.grpids) == 0)
goto fail;
2011-09-22 07:10:03 +08:00
}
2010-06-03 21:24:43 +08:00
if (xml_out) {
2011-09-27 00:25:10 +08:00
if(formatting_specs.nc_kind == NC_FORMAT_NETCDF4) {
snprintf(errmsg,sizeof(errmsg),"NcML output (-x) currently only permitted for netCDF classic model");
goto fail;
2011-09-27 00:25:10 +08:00
}
do_ncdumpx(ncid, path);
2010-06-03 21:24:43 +08:00
} else {
do_ncdump(ncid, path);
2010-06-03 21:24:43 +08:00
}
}
2011-09-22 07:10:03 +08:00
NC_CHECK( nc_close(ncid) );
2010-06-03 21:24:43 +08:00
}
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
nullfree(path) path = NULL;
Add filter support to NCZarr Filter support has three goals: 1. Use the existing HDF5 filter implementations, 2. Allow filter metadata to be stored in the NumCodecs metadata format used by Zarr, 3. Allow filters to be used even when HDF5 is disabled Detailed usage directions are define in docs/filters.md. For now, the existing filter API is left in place. So filters are defined using ''nc_def_var_filter'' using the HDF5 style where the id and parameters are unsigned integers. This is a big change since filters affect many parts of the code. In the following, the terms "compressor" and "filter" and "codec" are generally used synonomously. ### Filter-Related Changes: * In order to support dynamic loading of shared filter libraries, a new library was added in the libncpoco directory; it helps to isolate dynamic loading across multiple platforms. * Provide a json parsing library for use by plugins; this is created by merging libdispatch/ncjson.c with include/ncjson.h. * Add a new _Codecs attribute to allow clients to see what codecs are being used; let ncdump -s print it out. * Provide special headers to help support compilation of HDF5 filters when HDF5 is not enabled: netcdf_filter_hdf5_build.h and netcdf_filter_build.h. * Add a number of new test to test the new nczarr filters. * Let ncgen parse _Codecs attribute, although it is ignored. ### Plugin directory changes: * Add support for the Blosc compressor; this is essential because it is the most common compressor used in Zarr datasets. This also necessitated adding a CMake FindBlosc.cmake file * Add NCZarr support for the big-four filters provided by HDF5: shuffle, fletcher32, deflate (zlib), and szip * Add a Codec defaulter (see docs/filters.md) for the big four filters. * Make plugins work with windows by properly adding __declspec declaration. ### Misc. Non-Filter Changes * Replace most uses of USE_NETCDF4 (deprecated) with USE_HDF5. * Improve support for caching * More fixes for path conversion code * Fix misc. memory leaks * Add new utility -- ncdump/ncpathcvt -- that does more or less the same thing as cygpath. * Add a number of new test to test the non-filter fixes. * Update the parsers * Convert most instances of '#ifdef _MSC_VER' to '#ifdef _WIN32'
2021-09-03 07:04:26 +08:00
nc_finalize();
2010-06-03 21:24:43 +08:00
exit(EXIT_SUCCESS);
fail: /* ncstat failures */
path = (path?path:strdup("<unknown>"));
if(ncstat && strlen(errmsg) == 0)
snprintf(errmsg,sizeof(errmsg),"%s: %s", path, nc_strerror(ncstat));
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
nullfree(path); path = NULL;
if(strlen(errmsg) > 0)
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
error("%s", errmsg);
Add filter support to NCZarr Filter support has three goals: 1. Use the existing HDF5 filter implementations, 2. Allow filter metadata to be stored in the NumCodecs metadata format used by Zarr, 3. Allow filters to be used even when HDF5 is disabled Detailed usage directions are define in docs/filters.md. For now, the existing filter API is left in place. So filters are defined using ''nc_def_var_filter'' using the HDF5 style where the id and parameters are unsigned integers. This is a big change since filters affect many parts of the code. In the following, the terms "compressor" and "filter" and "codec" are generally used synonomously. ### Filter-Related Changes: * In order to support dynamic loading of shared filter libraries, a new library was added in the libncpoco directory; it helps to isolate dynamic loading across multiple platforms. * Provide a json parsing library for use by plugins; this is created by merging libdispatch/ncjson.c with include/ncjson.h. * Add a new _Codecs attribute to allow clients to see what codecs are being used; let ncdump -s print it out. * Provide special headers to help support compilation of HDF5 filters when HDF5 is not enabled: netcdf_filter_hdf5_build.h and netcdf_filter_build.h. * Add a number of new test to test the new nczarr filters. * Let ncgen parse _Codecs attribute, although it is ignored. ### Plugin directory changes: * Add support for the Blosc compressor; this is essential because it is the most common compressor used in Zarr datasets. This also necessitated adding a CMake FindBlosc.cmake file * Add NCZarr support for the big-four filters provided by HDF5: shuffle, fletcher32, deflate (zlib), and szip * Add a Codec defaulter (see docs/filters.md) for the big four filters. * Make plugins work with windows by properly adding __declspec declaration. ### Misc. Non-Filter Changes * Replace most uses of USE_NETCDF4 (deprecated) with USE_HDF5. * Improve support for caching * More fixes for path conversion code * Fix misc. memory leaks * Add new utility -- ncdump/ncpathcvt -- that does more or less the same thing as cygpath. * Add a number of new test to test the non-filter fixes. * Update the parsers * Convert most instances of '#ifdef _MSC_VER' to '#ifdef _WIN32'
2021-09-03 07:04:26 +08:00
nc_finalize();
exit(EXIT_FAILURE);
2010-06-03 21:24:43 +08:00
}
Regularize the scoping of dimensions This is a follow-on to pull request ````https://github.com/Unidata/netcdf-c/pull/1959````, which fixed up type scoping. The primary changes are to _nc\_inq\_dimid()_ and to ncdump. The _nc\_inq\_dimid()_ function is supposed to allow the name to be and FQN, but this apparently never got implemented. So if was modified to support FQNs. The ncdump program is supposed to output fully qualified dimension names in its generated CDL file under certain conditions. Suppose ncdump has a netcdf-4 file F with variable V, and V's parent group is G. For each dimension id D referenced by V, ncdump needs to determine whether to print its name as a simple name or as a fully qualified name (FQN). The algorithm is as follows: 1. Search up the tree of ancestor groups. 2. If one of those ancestor groups contains the dimid, then call it dimgrp. 3. If one of those ancestor groups contains a dim with the same name as the dimid, but with a different dimid, then record that as duplicate=true. 4. If dimgrp is defined and duplicate == false, then we do not need an fqn. 5. If dimgrp is defined and duplicate == true, then we do need an fqn to avoid incorrectly using the duplicate. 6. If dimgrp is undefined, then do a preorder breadth-first search of all the groups looking for the dimid. 7. If found, then use the fqn of the first found such dimension location. 8. If not found, then fail. Test case ncdump/test_scope.sh was modified to test the proper operation of ncdump and _nc\_inq\_dimid()_. Misc. Other Changes: * Fix nc_inq_ncid (NC4_inq_ncid actually) to return root group id if the name argument is NULL. * Modify _ncdump/printfqn_ to print out a dimid FQN; this supports verification that the resulting .nc files were properly created.
2021-06-01 05:51:12 +08:00
/* Helper function for searchgrouptreedim
search a specified group for matching dimid.
*/
static int
searchgroupdim(int grp, int dimid)
{
int i,ret = NC_NOERR;
int nids;
int* ids = NULL;
/* Get all dimensions in parentid */
if ((ret = nc_inq_dimids(grp, &nids, NULL, 0)))
goto done;
if (nids > 0) {
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
{ret = NC_ENOMEM; goto done;}
if ((ret = nc_inq_dimids(grp, &nids, ids, 0)))
goto done;
for(i = 0; i < nids; i++) {
if(ids[i] == dimid) goto done;
}
} else
ret = NC_EBADDIM;
done:
nullfree(ids);
return ret;
}
/* Helper function for do_ncdump_rec
search a tree of groups for a matching dimid
using a breadth first queue. Return the
immediately enclosing group.
*/
static int
searchgrouptreedim(int ncid, int dimid, int* parentidp)
{
int i,ret = NC_NOERR;
int nids;
int* ids = NULL;
NClist* queue = nclistnew();
int gid;
uintptr_t id;
id = ncid;
nclistpush(queue,(void*)id); /* prime the queue */
while(nclistlength(queue) > 0) {
id = (uintptr_t)nclistremove(queue,0);
gid = (int)id;
switch (ret = searchgroupdim(gid,dimid)) {
case NC_NOERR: /* found it */
if(parentidp) *parentidp = gid;
goto done;
case NC_EBADDIM: /* not in this group; keep looking */
break;
default: goto done;
}
/* Get subgroups of gid and push onto front of the queue (for breadth first) */
if((ret = nc_inq_grps(gid,&nids,NULL)))
goto done;
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
{ret = NC_ENOMEM; goto done;}
if ((ret = nc_inq_grps(gid, &nids, ids)))
goto done;
/* push onto the end of the queue */
for(i=0;i<nids;i++) {
id = ids[i];
nclistpush(queue,(void*)id);
}
free(ids); ids = NULL;
}
/* Not found */
ret = NC_EBADDIM;
done:
nclistfree(queue);
nullfree(ids);
return ret;
}