netcdf-c/ncdump/utils.c

999 lines
25 KiB
C
Raw Normal View History

/*********************************************************************
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/README file for copying and redistribution conditions.
* $Id$
*********************************************************************/
#include "config.h"
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <netcdf.h>
#include <assert.h>
#include <ctype.h>
#include "utils.h"
#include "nccomps.h"
#ifndef isascii
EXTERNL int isascii(int c);
#endif
/*
* Print error message to stderr and exit
*/
void
error(const char *fmt, ...)
{
va_list args ;
(void) fprintf(stderr,"%s: ", progname);
va_start(args, fmt) ;
(void) vfprintf(stderr,fmt,args) ;
va_end(args) ;
(void) fprintf(stderr, "\n") ;
(void) fflush(stderr); /* to ensure log files are current */
exit(EXIT_FAILURE);
}
void *
emalloc ( /* check return from malloc */
size_t size)
{
void *p;
p = (void *) malloc (size==0 ? 1 : size); /* malloc(0) not portable */
if (p == 0) {
error ("out of memory\n");
}
return p;
}
void *
ecalloc ( /* check return from calloc */
size_t size)
{
void *p;
p = (void *) calloc (1,(size==0 ? 1 : size)); /* calloc(0) not portable */
if (p == 0) {
error ("out of memory\n");
}
return p;
}
void *
erealloc (void* p0, /* check return from realloc */
size_t size)
{
void *p;
if(p0 == NULL)
return emalloc(size);
if(size == 0)
error("realloc with zero size");
p = (void *) realloc (p0,size); /* realloc(0) not portable */
if (p == 0) {
error ("out of memory\n");
}
return p;
}
void
check(int err, const char* file, const char* fcn, const int line)
{
fprintf(stderr,"%s\n",nc_strerror(err));
fprintf(stderr,"Location: file %s; fcn %s line %d\n",(file?file:"?"),(fcn?fcn:"?"),line);
fflush(stderr); fflush(stdout);
exit(1);
}
/*
* Returns malloced name with chars special to CDL escaped.
* Caller should free result when done with it.
*/
char*
escaped_name(const char* cp) {
char *ret; /* string returned */
char *sp;
assert(cp != NULL);
/* For some reason, and on some machines (e.g. tweety)
utf8 characters such as \343 are considered control character. */
/* if(*cp && (isspace(*cp) | iscntrl(*cp)))*/
if((*cp >= 0x01 && *cp <= 0x20) || (*cp == 0x7f))
{
error("name begins with space or control-character: %c",*cp);
}
ret = emalloc(4*strlen(cp) + 1); /* max if every char escaped */
sp = ret;
*sp = 0; /* empty name OK */
/* Special case: leading number allowed, but we must escape it for CDL */
if((*cp >= '0' && *cp <= '9'))
{
*sp++ = '\\';
}
for (; *cp; cp++) {
if (isascii((int)*cp)) {
if(iscntrl((int)*cp)) { /* render control chars as two hex digits, \%xx */
Revert/Improve nc_create + NC_DISKLESS behavior re: https://github.com/Unidata/netcdf-c/issues/1154 Inadvertently, the behavior of NC_DISKLESS with nc_create() was changed in release 4.6.1. Previously, the NC_WRITE flag needed to be explicitly used with NC_DISKLESS in order to cause the created file to be persisted to disk. Additional analyis indicated that the current NC_DISKLESS implementation was seriously flawed. This PR attempts to clean up and regularize the situation with respect to NC_DISKLESS control. One important aspect of diskless operation is that there are two different notions of write. 1. The file is read-write vs read-only when using the netcdf API. 2. The file is persisted or not to disk at nc_close(). Previously, these two were conflated. The rules now are as follows. 1. NC_DISKLESS + NC_WRITE means that the file is read/write using the netcdf API 2. NC_DISKLESS + NC_PERSIST means that the file is persisted to a disk file at nc_close. 3. NC_DISKLESS + NC_PERSIST + NC_WRITE means both 1 and 2. The NC_PERSIST flag is new and takes over the obsolete NC_MPIPOSIX flag. NC_MPIPOSIX is still defined, but is now an alias for the NC_MPIIO flag. It is also now the case that for netcdf-4, NC_DISKLESS is independent of NC_INMEMORY and in fact it is an error to specify both flags simultaneously. Finally, the MMAP code was fixed to use NC_PERSIST as well. Also marked MMAP as deprecated. Also added a test case to test various combinations of NC_DISKLESS, NC_PERSIST, and NC_WRITE. This PR affects a number of files and especially test cases that used NC_DISKLESS. Misc. Unrelated fixes 1. fixed some warnings in ncdump/dumplib.c
2018-10-11 03:32:17 +08:00
snprintf(sp, 4+1,"\\%%%.2x", *cp);
sp += 4;
} else {
switch (*cp) {
case ' ':
case '!':
case '"':
case '#':
case '$':
case '&':
case '\'':
case '(':
case ')':
case '*':
case ',':
case ':':
case ';':
case '<':
case '=':
case '>':
case '?':
case '[':
case ']':
case '\\':
case '^':
case '`':
case '{':
case '|':
case '}':
case '~':
*sp++ = '\\';
*sp++ = *cp;
break;
default: /* includes '/' */
*sp++ = *cp;
break;
}
}
} else { /* not ascii, assume just UTF-8 byte */
*sp++ = *cp;
}
}
*sp = 0;
return ret;
}
/*
* Print name with escapes for special characters
*/
void
print_name(const char* name) {
char *ename = escaped_name(name);
fputs(ename, stdout);
free(ename);
}
Add filter support to NCZarr Filter support has three goals: 1. Use the existing HDF5 filter implementations, 2. Allow filter metadata to be stored in the NumCodecs metadata format used by Zarr, 3. Allow filters to be used even when HDF5 is disabled Detailed usage directions are define in docs/filters.md. For now, the existing filter API is left in place. So filters are defined using ''nc_def_var_filter'' using the HDF5 style where the id and parameters are unsigned integers. This is a big change since filters affect many parts of the code. In the following, the terms "compressor" and "filter" and "codec" are generally used synonomously. ### Filter-Related Changes: * In order to support dynamic loading of shared filter libraries, a new library was added in the libncpoco directory; it helps to isolate dynamic loading across multiple platforms. * Provide a json parsing library for use by plugins; this is created by merging libdispatch/ncjson.c with include/ncjson.h. * Add a new _Codecs attribute to allow clients to see what codecs are being used; let ncdump -s print it out. * Provide special headers to help support compilation of HDF5 filters when HDF5 is not enabled: netcdf_filter_hdf5_build.h and netcdf_filter_build.h. * Add a number of new test to test the new nczarr filters. * Let ncgen parse _Codecs attribute, although it is ignored. ### Plugin directory changes: * Add support for the Blosc compressor; this is essential because it is the most common compressor used in Zarr datasets. This also necessitated adding a CMake FindBlosc.cmake file * Add NCZarr support for the big-four filters provided by HDF5: shuffle, fletcher32, deflate (zlib), and szip * Add a Codec defaulter (see docs/filters.md) for the big four filters. * Make plugins work with windows by properly adding __declspec declaration. ### Misc. Non-Filter Changes * Replace most uses of USE_NETCDF4 (deprecated) with USE_HDF5. * Improve support for caching * More fixes for path conversion code * Fix misc. memory leaks * Add new utility -- ncdump/ncpathcvt -- that does more or less the same thing as cygpath. * Add a number of new test to test the non-filter fixes. * Update the parsers * Convert most instances of '#ifdef _MSC_VER' to '#ifdef _WIN32'
2021-09-03 07:04:26 +08:00
/*
* Returns malloced string with selected chars escaped.
* Caller should free result when done with it.
*/
char*
escaped_string(const char* cp) {
char *ret; /* string returned */
char *sp;
assert(cp != NULL);
/* For some reason, and on some machines (e.g. tweety)
utf8 characters such as \343 are considered control character. */
ret = emalloc(4*strlen(cp) + 1); /* max if every char escaped */
sp = ret;
*sp = 0; /* empty name OK */
for (; *cp; cp++) {
if (isascii((int)*cp)) {
if(iscntrl((int)*cp)) { /* render control chars as two hex digits, \%xx */
snprintf(sp, 4+1,"\\%%%.2x", *cp);
sp += 4;
} else if(*cp == '"') {
*sp++ = '\\';
*sp++ = '"';
} else
*sp++ = *cp;
} else { /* not ascii, assume just UTF-8 byte */
*sp++ = *cp;
}
}
*sp = 0;
return ret;
}
/* Convert a full path name to a group to the specific groupid. */
int
nc_inq_grpid2(int ncid, const char *grpname0, int *grpidp)
{
int ret = NC_NOERR;
char* grpname = NULL;
#ifdef USE_NETCDF4
char *sp = NULL;
#endif
grpname = strdup(grpname0);
if(grpname == NULL) {ret = NC_ENOMEM; goto done;}
#ifdef USE_NETCDF4
/* If '/' doesn't occur in name, just return id found by nc_inq_grpid() */
sp = strrchr(grpname, '/');
if(!sp) { /* No '/' in grpname, so return nc_inq_grpid() result */
ret = nc_inq_grp_ncid(ncid, grpname, grpidp);
goto done;
}
{ /* Parse group name out and get grpid using that */
char* p, *q;
int next;
p = grpname;
if(grpname[0] == '/') {
/* get ncid of the root group */
ncid = getrootid(ncid);
p++; /* skip leading '/' */
}
/* Walk down looking for each group in path in turn */
while(*p) {
q = strchr(p,'/');
if(q == NULL) q = p+strlen(p); /* point to trailing nul */
else *q++ = '\0';
/* Lookup this path segment wrt to current group */
if((ret=nc_inq_ncid(ncid,p,&next))) goto done;
/* move to next segment */
p = q;
ncid = next;
}
if(grpidp) *grpidp = ncid;
}
#else /* !USE_NETCDF4 */
/* Just return root */
if(grpidp) *grpidp = ncid;
#endif /* USE_NETCDF4 */
done:
if(grpname) free(grpname);
return ret;
}
/* Convert a full path name to a varid to the specific varid + grpid */
int
nc_inq_varid2(int ncid, const char *path0, int* varidp, int* grpidp)
{
int ret = NC_NOERR;
int grpid, varid;
char *v, *g, *prefix;
/* If '/' doesn't occur in name, just return id found by
* nc_inq_grpid()
*/
char* path = NULL;
path = strdup(path0);
if(path == NULL) {ret = NC_ENOMEM; goto done;}
/* Find the rightmost '/' and tag the start of the path */
g = strrchr(path,'/');
if(g == NULL) {
v = path;
prefix = "/"; /* make sure not free'd */
} else {
*g++ = '\0'; /* separate out the prefix */
prefix = path;
v = g;
}
/* convert the group prefix to a group id */
if((ret=nc_inq_grpid2(ncid,prefix,&grpid)))
goto done;
/* Lookup the var in the terminal group */
if((ret=nc_inq_varid(grpid,v,&varid)))
goto done;
if(grpidp)
*grpidp = grpid;
if(varidp)
*varidp = varid;
done:
if(path) free(path);
return ret;
}
/* Missing functionality that should be in nc_inq_dimid(), to get
* dimid from a full dimension path name that may include group
* names */
int
nc_inq_dimid2(int ncid, const char *dimname, int *dimidp) {
int ret = NC_NOERR;
/* If '/' doesn't occur in dimname, just return id found by
* nc_inq_dimid() */
char *sp = strrchr(dimname, '/');
if(!sp) { /* No '/' in dimname, so return nc_inq_dimid() result */
ret = nc_inq_dimid(ncid, dimname, dimidp);
}
#ifdef USE_NETCDF4
else { /* Parse group name out and get dimid using that */
size_t grp_namelen = sp - dimname;
char *grpname = emalloc(grp_namelen+1);
int grpid;
strncpy(grpname, dimname, grp_namelen+1);
grpname[grp_namelen] = '\0';
ret = nc_inq_grp_full_ncid(ncid, grpname, &grpid);
if(ret == NC_NOERR) {
ret = nc_inq_dimid(grpid, dimname, dimidp);
}
free(grpname);
}
#endif /* USE_NETCDF4 */
return ret;
}
/*
* return 1 if varid identifies a record variable
* else return 0
*/
int
isrecvar(int ncid, int varid)
{
int ndims;
int is_recvar = 0;
int *dimids;
NC_CHECK( nc_inq_varndims(ncid, varid, &ndims) );
#ifdef USE_NETCDF4
if (ndims > 0) {
int nunlimdims;
int *recdimids;
int dim, recdim;
dimids = (int *) emalloc((ndims + 1) * sizeof(int));
NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) );
NC_CHECK( nc_inq_unlimdims(ncid, &nunlimdims, NULL) );
recdimids = (int *) emalloc((nunlimdims + 1) * sizeof(int));
NC_CHECK( nc_inq_unlimdims(ncid, NULL, recdimids) );
for (dim = 0; dim < ndims && is_recvar == 0; dim++) {
for(recdim = 0; recdim < nunlimdims; recdim++) {
if(dimids[dim] == recdimids[recdim]) {
is_recvar = 1;
break;
}
}
}
free(dimids);
free(recdimids);
}
#else
if (ndims > 0) {
int recdimid;
dimids = (int *) emalloc((ndims + 1) * sizeof(int));
NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) );
NC_CHECK( nc_inq_unlimdim(ncid, &recdimid) );
if(dimids[0] == recdimid)
is_recvar = 1;
free(dimids);
}
#endif /* USE_NETCDF4 */
return is_recvar;
}
static idnode_t*
newidnode(void) {
idnode_t *newvp = (idnode_t*) emalloc(sizeof(idnode_t));
return newvp;
}
/*
* Get a new, empty variable list.
*/
idnode_t*
newidlist(void) {
idnode_t *vp = newidnode();
vp -> next = 0;
vp -> id = -1; /* bad id */
return vp;
}
void
idadd(idnode_t* vlist, int varid) {
idnode_t *newvp = newidnode();
newvp -> next = vlist -> next;
newvp -> id = varid;
vlist -> next = newvp;
}
/*
* return true if id is member of list that idlist points to.
*/
bool_t
idmember(const idnode_t* idlist, int id)
{
idnode_t *vp = idlist -> next;
for (; vp ; vp = vp->next)
if (vp->id == id)
return true;
return false;
}
/*
* Release a variable list.
*/
void
freeidlist(idnode_t *idlist)
{
while(idlist) {
idnode_t *vp = idlist->next;
free(idlist);
idlist = vp;
}
}
/*
* Return true if group identified by grpid is member of grpids, a list of groups.
* nlgrps is number of groups in the list.
*/
bool_t
group_wanted(int grpid, int nlgrps, const idnode_t* grpids)
{
/* If -g not specified, all groups are wanted */
if(nlgrps == 0) return true;
/* if -g specified, look for match in group id list */
return idmember(grpids, grpid);
}
/* Determine whether a group named formatting_specs.lgrps[igrp] exists
* in a netCDF file or group with id ncid. If so, return the count of
* how many matching groups were found, else return a count of 0. If
* the name begins with "/", it is interpreted as an absolute group
* name, in which case only 0 or 1 is returned. Otherwise, interpret
* it as a relative name, and the total number of occurrences within
* the file/group identified by ncid is returned.
*
* Also has side effect of updating the ngrpids and the associate
* grpids array that represent the group list specified by the -g
* option. TODO: put this in its own function instead.
*/
static size_t
nc_inq_grpname_count(int ncid, int igrp, char **lgrps, idnode_t *grpids) {
size_t count = 0;
#ifdef USE_NETCDF4
int numgrps;
int *ncids;
int g;
int grpid;
int status;
#endif
char *grpname = lgrps[igrp];
/* permit empty string to also designate root group */
if(grpname[0] == '\0' || NCSTREQ(grpname,"/")) {
count = 1;
idadd(grpids, ncid);
return count;
}
#ifdef USE_NETCDF4
/* Handle absolute group names */
if(grpname[0] == '/') {
int grpid;
status = nc_inq_grp_full_ncid(ncid, grpname, &grpid);
if(status == NC_NOERR) {
count = 1;
idadd(grpids, grpid);
} else if(status == NC_ENOGRP) {
count = 0;
} else {
error("when looking up group %s: %s ", grpname, nc_strerror(status));
}
return count;
}
/* look in this group */
status = nc_inq_grp_ncid(ncid, grpname, &grpid);
if (status == NC_NOERR) {
count++;
idadd(grpids, grpid);
}
/* if this group has subgroups, call recursively on each of them */
NC_CHECK( nc_inq_grps(ncid, &numgrps, NULL) );
if(numgrps > 0) {
/* Allocate memory to hold the list of group ids. */
ncids = emalloc(numgrps * sizeof(int));
/* Get the list of group ids. */
NC_CHECK( nc_inq_grps(ncid, NULL, ncids) );
/* Call this function recursively for each group. */
for (g = 0; g < numgrps; g++) {
count += nc_inq_grpname_count(ncids[g], igrp, lgrps, grpids);
}
free(ncids);
}
#endif /* USE_NETCDF4 */
return count;
}
/* Check if any group names specified with "-g grp1,...,grpn" are
* missing. Returns total number of matching groups if no missing
* groups detected, otherwise exits. */
int
grp_matches(int ncid, int nlgrps, char** lgrps, idnode_t *grpids) {
int ig;
size_t total = 0;
for (ig=0; ig < nlgrps; ig++) {
size_t count = nc_inq_grpname_count(ncid, ig, lgrps, grpids);
if(count == 0) {
error("%s: No such group", lgrps[ig]);
return 0;
}
total += count;
}
return total;
}
/* Returns 1 if string s1 ends with string s2, 0 otherwise. */
int
strendswith(const char *s1, const char *s2) {
size_t m1 = strlen(s1);
size_t m2 = strlen(s2);
if (m1 < m2)
return 0;
return (strcmp(s1 + (m1 - m2), s2) == 0);
}
/* Get varid of variable with name using nested group syntax
* "gp1/gp2/var" or "/gp1/gp2/var". In the former case, grpname of
* grp corresponding to grpid must end in "gp1/gp2". In the latter
* case, grpname for grpid must be exactly "/gp1/gp2". If variable
* named "var" is not in group grpid, returns NC_ENOTVAR, else sets
* varid and returns NC_NOERR. */
int
nc_inq_gvarid(int grpid, const char *varname, int *varidp) {
/* if varname has no "/" chars, then
return varidp from nc_inq_varid(grpid, varname, varidp)
if varname begins with "/"
else
get groupname corresponding to grpid
get vargroup = substring of varname up to last "/"
get relname = substring of varname after last "/"
if (varname starts with "/" and groupname == vargroup) ||
(groupname ends with vargroup)
return nc_inq_varid(grpid, relname, varidp)
else
return NC_ENOTVAR
*/
#ifdef USE_NETCDF4
char *vargroup;
char *relname;
char *groupname;
int status;
if (varname[0] == '\0')
return NC_ENOTVAR;
vargroup = strdup(varname);
if (vargroup == NULL)
return NC_ENOMEM;
relname = strrchr(vargroup, NC_GRP_DELIM);
if (relname != NULL) { /* name has a "/" in it */
size_t len; /* length of full group name for grpid */
*relname++ = '\0'; /* split vargroup string in two,
* vargroup and relname */
if ( (status = nc_inq_grpname_full(grpid, &len, NULL)) != NC_NOERR ) {
free(vargroup);
return status;
}
groupname = (char *)emalloc(len + 1);
if ( (status = nc_inq_grpname_full(grpid, &len, groupname)) == NC_NOERR ) {
if(varname[0] == NC_GRP_DELIM) {
if( strcmp(groupname, vargroup) == 0)
status = nc_inq_varid(grpid, relname, varidp);
else
status = NC_ENOTVAR;
} else {
if(strendswith(groupname, vargroup))
status = nc_inq_varid(grpid, relname, varidp);
else
status = NC_ENOTVAR;
}
}
free(vargroup);
free(groupname);
return status;
}
free(vargroup);
#endif /* USE_NETCDF4 */
return nc_inq_varid(grpid, varname, varidp);
}
/* Determine whether a variable named varname exists in any group in
an open netCDF file with id ncid. If so, return the count of how
many matching variables were found, else return a count of 0. The
variable name can be absolute such as "/foo" or "/GRP1/GRP1A/foo",
in which case there is only one group to look in, given by the path
from the root group. Alternatively, the variable name can be
relative, such as "foo" or "GRPA/GRPB/foo", in which case every
group is examined for a variable with that relative name. */
size_t
nc_inq_varname_count(int ncid, char *varname) {
/*
count = 0;
status = nc_inq_gvarid(ncid, varname, varid);
if (status == NC_NOERR)
count++;
for each subgroup gid {
count += nc_inq_varname_count(gid, varname);
}
return count;
*/
size_t count = 0;
int varid;
/* look in this group */
int status = nc_inq_gvarid(ncid, varname, &varid);
#ifdef USE_NETCDF4
int numgrps;
int *ncids;
int g;
#endif
if (status == NC_NOERR)
count++;
#ifdef USE_NETCDF4
/* if this group has subgroups, call recursively on each of them */
NC_CHECK( nc_inq_grps(ncid, &numgrps, NULL) );
/* Allocate memory to hold the list of group ids. */
ncids = emalloc((numgrps + 1) * sizeof(int));
/* Get the list of group ids. */
NC_CHECK( nc_inq_grps(ncid, NULL, ncids) );
/* Call this function for each group. */
for (g = 0; g < numgrps; g++) {
count += nc_inq_varname_count(ncids[g], varname);
}
free(ncids);
#endif /* USE_NETCDF4 */
return count;
}
/* Check if any variable names specified with "-v var1,...,varn" are
* missing. Returns 0 if no missing variables detected, otherwise
* exits. */
int
missing_vars(int ncid, int nlvars, char **lvars) {
int iv;
for (iv=0; iv < nlvars; iv++) {
if(nc_inq_varname_count(ncid, lvars[iv]) == 0) {
error("%s: No such variable", lvars[iv]);
}
}
return 0;
}
void
make_lvars(char *optarg, int *nlvarsp, char ***lvarsp)
{
char *cp = optarg;
int nvars = 1;
char ** cpp;
/* compute number of variable names in comma-delimited list */
*nlvarsp = 1;
while (*cp++)
if (*cp == ',')
nvars++;
*nlvarsp = nvars;
*lvarsp = (char **) emalloc(nvars * sizeof(char*));
cpp = *lvarsp;
/* copy variable names into list */
for (cp = strtok(optarg, ","); cp != NULL; cp = strtok((char *) NULL, ",")) {
*cpp = strdup(cp);
cpp++;
}
}
void
make_lgrps(char *optarg, int *nlgrps, char ***lgrpsp, idnode_t **grpidsp)
{
char *cp = optarg;
int ngrps = 1;
char ** cpp;
/* compute number of group names in comma-delimited list */
while (*cp++)
if (*cp == ',')
ngrps++;
*nlgrps = ngrps;
*lgrpsp = (char **) emalloc(ngrps * sizeof(char*));
cpp = *lgrpsp;
/* copy group names into list */
for (cp = strtok(optarg, ","); cp != NULL; cp = strtok((char *) NULL, ",")) {
*cpp = strdup(cp);
cpp++;
}
/* make empty list of grpids, to be filled in after input file opened */
*grpidsp = newidlist();
}
/* initialize and return a new empty stack of grpids */
static ncgiter_t *
gs_init() {
ncgiter_t *s = emalloc(sizeof(ncgiter_t));
s->ngrps = 0;
s->top = NULL;
return s;
}
/* free a stack and all its nodes */
static void
gs_free(ncgiter_t *s) {
grpnode_t *n0, *n1;
n0 = s->top;
while (n0) {
n1 = n0->next;
free(n0);
n0 = n1;
}
free(s);
}
/* test if a stack is empty */
static int
gs_empty(ncgiter_t *s)
{
return s->ngrps == 0;
}
/* push a grpid on stack */
static void
gs_push(ncgiter_t *s, int grpid)
{
grpnode_t *node = emalloc(sizeof(grpnode_t));
node->grpid = grpid;
node->next = gs_empty(s) ? NULL : s->top;
s->top = node;
s->ngrps++;
}
/* pop value off stack and return */
static int
gs_pop(ncgiter_t *s)
{
if (gs_empty(s)) {
return -1; /* underflow, stack is empty */
} else { /* pop a node */
grpnode_t *top = s->top;
int value = top->grpid;
s->top = top->next;
/* TODO: first call to free gets seg fault with libumem */
free(top);
s->ngrps--;
return value;
}
}
#ifdef UNUSED
/* Return top value on stack without popping stack. Defined for
* completeness but not used (here). */
static int
gs_top(ncgiter_t *s)
{
if (gs_empty(s)) {
return -1; /* underflow, stack is empty */
} else { /* get top value */
grpnode_t *top = s->top;
int value = top->grpid;
return value;
}
}
#endif
/* Like netCDF-4 function nc_inq_grps(), but can be called from
* netCDF-3 only code as well. Maybe this is what nc_inq_grps()
* should do if built without netCDF-4 data model support. */
static int
nc_inq_grps2(int ncid, int *numgrps, int *grpids)
{
int stat = NC_NOERR;
/* just check if ncid is valid id of open netCDF file */
NC_CHECK(nc_inq(ncid, NULL, NULL, NULL, NULL));
#ifdef USE_NETCDF4
NC_CHECK(nc_inq_grps(ncid, numgrps, grpids));
#else
*numgrps = 0;
#endif
return stat;
}
/* Initialize group iterator for start group and all its descendant
* groups. */
int
nc_get_giter(int grpid, /* start group id */
ncgiter_t **iterp /* returned opaque iteration state */
)
{
int stat = NC_NOERR;
stat = nc_inq(grpid, NULL, NULL, NULL, NULL); /* check if grpid is valid */
if(stat != NC_EBADGRPID && stat != NC_EBADID) {
*iterp = gs_init();
gs_push(*iterp, grpid);
}
return stat;
}
/*
* Get group id of next group. On first call gets start group id,
* subsequently returns other subgroup ids in preorder. Returns zero
* when no more groups left.
*/
int
nc_next_giter(ncgiter_t *iterp, int *grpidp) {
int stat = NC_NOERR;
int numgrps;
int *grpids;
int i;
if(gs_empty(iterp)) {
*grpidp = 0; /* not a group, signals iterator is done */
} else {
*grpidp = gs_pop(iterp);
NC_CHECK(nc_inq_grps2(*grpidp, &numgrps, NULL));
if(numgrps > 0) {
grpids = (int *)emalloc(sizeof(int) * numgrps);
NC_CHECK(nc_inq_grps2(*grpidp, &numgrps, grpids));
for(i = numgrps - 1; i >= 0; i--) { /* push ids on stack in reverse order */
gs_push(iterp, grpids[i]);
}
free(grpids);
}
}
return stat;
}
/*
* Release group iter.
*/
void
nc_free_giter(ncgiter_t *iterp)
{
gs_free(iterp);
}
/*
* Get total number of groups (including the top-level group and all
* descendant groups, recursively) and all descendant subgroup ids
* (including the input rootid of the start group) for a group and
* all its descendants, in preorder.
*
* If grpids or numgrps is NULL, it will be ignored. So typical use
* is to call with grpids NULL to get numgrps, allocate enough space
* for the group ids, then call again to get them.
*/
int
nc_inq_grps_full(int rootid, int *numgrps, int *grpids)
{
int stat = NC_NOERR;
ncgiter_t *giter; /* pointer to group iterator */
int grpid;
size_t count;
NC_CHECK(nc_get_giter(rootid, &giter));
count = 0;
NC_CHECK(nc_next_giter(giter, &grpid));
while(grpid != 0) {
if(grpids)
grpids[count] = grpid;
count++;
NC_CHECK(nc_next_giter(giter, &grpid));
}
if(numgrps)
*numgrps = count;
nc_free_giter(giter);
return stat;
}
int
getrootid(int grpid)
{
int current = grpid;
#ifdef USE_NETCDF4
int parent = current;
/* see if root id */
for(;;) {
int stat = nc_inq_grp_parent(current,&parent);
if(stat) break;
current = parent;
}
#endif
return current;
}
#if 0
static int
parseFQN(int ncid, const char* fqn0, VarID* idp)
{
int stat = NC_NOERR;
char* fqn;
VarID vid;
char* p;
char* q;
char* segment;
vid.grpid = ncid;
if(fqn0 == NULL || fqn0[1] != '/')
{stat = NC_EBADNAME; goto done;}
fqn = strdup(fqn0+1); /* skip leading '/'*/
p = fqn;
for(;;) {
int newgrp;
segment = p;
q = p;
while(*p != '\0' && *p != '/') {
if(*p == '\\') p++;
*q++ = *p++;
}
if(*p == '\0') break;
*p++ = '\0';
if((stat=nc_inq_grp_ncid(vid.grpid,segment,&newgrp))) goto done;
vid.grpid = newgrp;
}
/* Segment should point to the varname */
if((stat=nc_inq_varid(vid.grpid,segment,&vid.varid))) goto done;
done:
if(fqn) free(fqn);
if(stat == NC_NOERR && idp != NULL) *idp = vid;
return stat;
}
#endif
Add filter support to NCZarr Filter support has three goals: 1. Use the existing HDF5 filter implementations, 2. Allow filter metadata to be stored in the NumCodecs metadata format used by Zarr, 3. Allow filters to be used even when HDF5 is disabled Detailed usage directions are define in docs/filters.md. For now, the existing filter API is left in place. So filters are defined using ''nc_def_var_filter'' using the HDF5 style where the id and parameters are unsigned integers. This is a big change since filters affect many parts of the code. In the following, the terms "compressor" and "filter" and "codec" are generally used synonomously. ### Filter-Related Changes: * In order to support dynamic loading of shared filter libraries, a new library was added in the libncpoco directory; it helps to isolate dynamic loading across multiple platforms. * Provide a json parsing library for use by plugins; this is created by merging libdispatch/ncjson.c with include/ncjson.h. * Add a new _Codecs attribute to allow clients to see what codecs are being used; let ncdump -s print it out. * Provide special headers to help support compilation of HDF5 filters when HDF5 is not enabled: netcdf_filter_hdf5_build.h and netcdf_filter_build.h. * Add a number of new test to test the new nczarr filters. * Let ncgen parse _Codecs attribute, although it is ignored. ### Plugin directory changes: * Add support for the Blosc compressor; this is essential because it is the most common compressor used in Zarr datasets. This also necessitated adding a CMake FindBlosc.cmake file * Add NCZarr support for the big-four filters provided by HDF5: shuffle, fletcher32, deflate (zlib), and szip * Add a Codec defaulter (see docs/filters.md) for the big four filters. * Make plugins work with windows by properly adding __declspec declaration. ### Misc. Non-Filter Changes * Replace most uses of USE_NETCDF4 (deprecated) with USE_HDF5. * Improve support for caching * More fixes for path conversion code * Fix misc. memory leaks * Add new utility -- ncdump/ncpathcvt -- that does more or less the same thing as cygpath. * Add a number of new test to test the non-filter fixes. * Update the parsers * Convert most instances of '#ifdef _MSC_VER' to '#ifdef _WIN32'
2021-09-03 07:04:26 +08:00
/*********************************************************************************/
void nc_get_att_single_string(const int ncid, const int varid,
const struct ncatt_t *att, char **str_out) {
if (att->type == NC_CHAR) {
// NC_CHAR type attribute
// Use a call to nc_get_att_text which expects to output the attribute value
// into a char * pointing to allocated memory. The number of bytes to allocate
// is the attribute length (which is the number of elements in a vector, 1 for
// scalar) times the size of each element in bytes. The attribute length is
// held in att->len, and the attribute element size is in att->tinfo->size.
*str_out = emalloc((att->len + 1) * att->tinfo->size);
(*str_out)[att->len] = '\0';
NC_CHECK(nc_get_att_text(ncid, varid, att->name, *str_out));
} else if (att->type == NC_STRING) {
// NC_STRING type attribute
// Use a call to nc_get_att_string which expects to output the attribute value
// into a vector of char pointers, where each entry points to allocated memory.
// The vector of char pointers needs to be allocated to the length (number of strings)
// times the size of each entry (size of a char *).
char **att_strings = emalloc((att->len + 1) * att->tinfo->size);
NC_CHECK(nc_get_att_string(ncid, varid, att->name, att_strings));
// str_out needs to be allocated to a size large enough to hold the string that
// the first pointer in att_strings is pointing to.
size_t att_str_len = strlen(att_strings[0]);
*str_out = emalloc((att_str_len + 1) * att->tinfo->size);
(*str_out)[att_str_len] = '\0';
strncpy(*str_out, att_strings[0], att_str_len);
nc_free_string(att->len, att_strings); /* Warning: does not free att_strings */
free(att_strings);
} else {
fprintf(stderr,"nc_get_att_single_string: unknown attribute type: %d\n", att->type);
fprintf(stderr," must use one of: NC_CHAR, NC_STRING\n");
fflush(stderr); fflush(stdout);
exit(2);
}
}