netcdf-c/ncdump/chunkspec.c
Dennis Heimbigner f376c23329 Make utilities support NC_COMPACT
re: https://github.com/Unidata/netcdf-c/issues/1642

Modify ncdump, nccopy, and ncgen to support the NC_COMPACT storage option.
Added test cases and added description to the man pages for the utilities.

1. ncdump: For compact storage variable, print special attribute __Storage_ as
````
    <var>: _Storage = "compact";
````

2. ncgen: parse and implement
````
    <var>: _Storage = "compact";
````
in a .cdl file

3. nccopy: Extend the chunk specification (-c flag) to support
   compact using the forms
````
nccopy ... -c <var>:compact
and
nccopy ... -c <var>:contiguous
````

Misc. other changes
1. cleanup the copy_chunking function in ncdump/nccopy.c
2020-02-29 12:06:21 -07:00

396 lines
11 KiB
C

/*********************************************************************
* Copyright 2018, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
* $Id $
*********************************************************************/
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "netcdf.h"
#include "list.h"
#include "utils.h"
#include "chunkspec.h"
/* Structure mapping dimension IDs to corresponding chunksizes. */
static struct DimChunkSpecs {
size_t ndims; /* number of dimensions in chunkspec string */
int *idimids; /* (input) ids for dimensions in chunkspec string */
size_t *chunksizes; /* corresponding chunk sizes */
bool_t omit; /* true if chunking to be turned off */
} dimchunkspecs;
struct VarChunkSpec {
size_t rank; /* number of dimensions in chunkspec string */
size_t chunksizes[NC_MAX_VAR_DIMS]; /* corresponding chunk sizes */
bool_t omit; /* true if chunking to be turned off */
int kind;
int igrpid; /* container of the (input) variable */
int ivarid; /* (input) Variable whose chunks are specified */
};
static List* varchunkspecs = NULL; /* List<VarChunkSpec> */
/* Forward */
static int dimchunkspec_parse(int ncid, const char *spec);
static int varchunkspec_parse(int ncid, const char *spec);
void
chunkspecinit(void)
{
/* initialization */
if(varchunkspecs == NULL)
varchunkspecs = listnew();
memset(&dimchunkspecs,0,sizeof(dimchunkspecs));
}
/*
* Parse chunkspec string of either kind.
* Returns NC_NOERR if no error, NC_EINVAL if spec was malformed.
*/
int
chunkspec_parse(int igrp, const char *spec)
{
/* Decide if this is a per-variable or per-dimension chunkspec */
if (!spec || *spec == '\0')
return NC_NOERR; /* Use defaults */
if(strchr(spec,':') == NULL)
return dimchunkspec_parse(igrp,spec);
else
return varchunkspec_parse(igrp,spec);
}
/*
* Parse chunkspec string and convert into dimchunkspec structure.
* ncid: location ID of open netCDF file or group in an open file
* spec: string of form
* dim1/n1,dim2/n2,...,dimk/nk
* specifying chunk size (ni) to be used for dimension named
* dimi. Dimension names may be absolute,
* e.g. "/grp_a/grp_a1/dim". The "ni" part of the spec may be
* omitted, in which case it is assumed to be the entire
* dimension size. That is also the default for dimensions
* not mentioned in the string. However, for unlimited dimensions,
* the default is a default size: 4 megabytes or the
* existing unlimited size if smaller.
* If the chunkspec string is "/", specifying no dimensions or
* chunk sizes, it indicates chunking to be turned off on output.
*
* Returns NC_NOERR if no error, NC_EINVAL if spec has consecutive
* unescaped commas or no chunksize specified for dimension.
*/
static int
dimchunkspec_parse(int igrp, const char *spec)
{
const char *cp; /* character cursor */
const char *pp = spec; /* previous char cursor for detecting escapes */
const char *np; /* beginning of current dimension name */
size_t ndims = 0;
int idim;
int ret = NC_NOERR;
int comma_seen = 0;
dimchunkspecs.ndims = 0;
dimchunkspecs.omit = false;
if (!spec || *spec == '\0') /* default chunking */
goto done;
/* Special rule: // is treated as equivalent to / */
if ((spec[0] == '/' && spec[1] == '\0')
|| (spec[0] == '/' && spec[1] == '/' && spec[2] == '\0')) { /* no chunking */
dimchunkspecs.omit = true;
goto done;
}
/* Count unescaped commas, handle consecutive unescaped commas as error */
for(cp = spec; *cp; cp++) {
if(*cp == ',' && *pp != '\\') {
if(comma_seen) { /* consecutive commas detected */
{ret = NC_EINVAL; goto done;}
}
comma_seen = 1;
ndims++;
} else {
comma_seen = 0;
}
pp = cp;
}
ndims++;
dimchunkspecs.ndims = ndims;
dimchunkspecs.idimids = (int *) emalloc(ndims * sizeof(int));
dimchunkspecs.chunksizes = (size_t *) emalloc(ndims * sizeof(size_t));
/* Look up dimension ids and assign chunksizes */
pp = spec;
np = spec;
idim = 0;
for(cp = spec; ; cp++) {
if(*cp == '\0' || (*cp == ',' && *pp != '\\')) { /* found end of "dim/nn" part */
char* dimname = 0;
char *dp;
int dimid;
size_t chunksize;
for(; pp > np && *pp != '/'; pp--) { /* look backwards for "/" */
continue;
}
if(*pp != '/') { /* no '/' found, no chunksize specified for dimension */
ret = NC_EINVAL;
goto done;
}
/* extract dimension name */
dimname = (char *) emalloc(pp - np + 1);
dp = dimname;
while(np < pp) {
*dp++ = *np++;
}
*dp = '\0';
/* look up dimension id from dimension pathname */
ret = nc_inq_dimid2(igrp, dimname, &dimid);
if(ret != NC_NOERR)
{if(dimname) free(dimname); goto done;}
dimchunkspecs.idimids[idim] = dimid;
/* parse and assign corresponding chunksize */
pp++; /* now points to first digit of chunksize, ',', or '\0' */
if(*pp == ',' || *pp == '\0') { /* no size specified, use dim len */
size_t dimlen;
ret = nc_inq_dimlen(igrp, dimid, &dimlen);
if(ret != NC_NOERR)
{if(dimname) free(dimname); goto done;}
chunksize = dimlen;
} else { /* convert nnn string to long long integer */
char *ep;
#ifdef HAVE_STRTOLL
long long val = strtoll(pp, &ep, 0);
#else
long long val = strtol(pp, &ep, 0);
#endif
if(ep == pp || errno == ERANGE || val < 1) /* allow chunksize bigger than dimlen */
{if(dimname) free(dimname); ret = NC_EINVAL; goto done;}
chunksize = (size_t)val;
}
dimchunkspecs.chunksizes[idim] = chunksize;
idim++;
if(dimname) free(dimname);
dimname = NULL;
if(*cp == '\0')
break;
/* set np to point to first char after comma */
np = cp + 1;
}
pp = cp;
};
done:
return ret;
}
/* Return size in chunkspec string specified for dimension corresponding to dimid, 0 if not found */
size_t
dimchunkspec_size(int indimid) {
int idim;
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
if(indimid == dimchunkspecs.idimids[idim]) {
return dimchunkspecs.chunksizes[idim];
}
}
return 0;
}
/* Return number of dimensions for which chunking was specified in
* chunkspec string on command line, 0 if no chunkspec string was
* specified. */
int
dimchunkspec_ndims(void) {
return dimchunkspecs.ndims;
}
/* Return whether chunking should be omitted, due to explicit
* command-line specification. */
bool_t
dimchunkspec_omit(void) {
return dimchunkspecs.omit;
}
/* Return whether chunking should be omitted, due to explicit
* command-line specification. */
bool_t
dimchunkspec_exists(int indimid) {
int idim;
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
if(indimid == dimchunkspecs.idimids[idim]) {
return 1;
}
}
return 0;
}
/*
* Parse per-variable chunkspec string and convert into varchunkspec structure.
* ncid: location ID of open netCDF file or group in an open file
* spec: string of form
* var:n1,n2,...nk
*
* specifying chunk size (ni) to be used for ith dimension of
* variable named var. Variable names may be absolute.
* e.g. "/grp_a/grp_a1/var".
* If no chunk sizes are specified, then the variable is not chunked at all.
*
* Returns NC_NOERR if no error, NC_EINVAL if spec has consecutive
* unescaped commas or no chunksize specified for dimension.
*/
static int
varchunkspec_parse(int igrp, const char *spec0)
{
int ret = NC_NOERR;
int rank;
int i;
int dimids[NC_MAX_VAR_DIMS];
struct VarChunkSpec* chunkspec = NULL;
char* spec = NULL;
char* p, *q; /* for walking strings */
/* Copy spec so we can modify in place */
spec = strdup(spec0);
if(spec == NULL) {ret = NC_ENOMEM; goto done;}
chunkspec = calloc(1,sizeof(struct VarChunkSpec));
if(chunkspec == NULL) {ret = NC_ENOMEM; goto done;}
chunkspec->igrpid = igrp;
/* First, find the end of the variable part */
p = strchr(spec,':');
if(p == NULL)
{ret = NC_EINVAL; goto done;}
*p++ = '\0';
/* Lookup the variable by name */
ret = nc_inq_varid2(igrp, spec, &chunkspec->ivarid, &chunkspec->igrpid);
if(ret != NC_NOERR) goto done;
if(*p == '\0') {/* we have -c var: => do not chunk var */
chunkspec->omit = 1;
/* add the chunkspec to our list */
listpush(varchunkspecs,chunkspec);
chunkspec = NULL;
goto done;
}
/* See if the remainder matches 'compact' or 'contiguous' */
if(strcasecmp(p,"compact")==0) {
chunkspec->kind = NC_COMPACT;
goto notchunked;
} if(strcasecmp(p,"contiguous")==0) {
chunkspec->kind = NC_CONTIGUOUS;
goto notchunked;
} else
chunkspec->kind = NC_CHUNKED;
/* Iterate over dimension sizes */
while(*p) {
unsigned long dimsize;
q = strchr(p,',');
if(q == NULL)
q = p + strlen(p); /* Fake the endpoint */
else
*q++ = '\0';
/* Scan as unsigned long */
if(sscanf(p,"%lu",&dimsize) != 1)
{ret = NC_EINVAL; goto done;} /* Apparently not a valid dimension size */
if(chunkspec->rank >= NC_MAX_VAR_DIMS) {ret = NC_EINVAL; goto done;} /* to many chunks */
chunkspec->chunksizes[chunkspec->rank] = (size_t)dimsize;
chunkspec->rank++;
p = q;
}
/* Now do some validity checking */
/* Get some info about the var (from input) */
ret = nc_inq_var(chunkspec->igrpid,chunkspec->ivarid,NULL,NULL,&rank,dimids,NULL);
if(ret != NC_NOERR) goto done;
/* 1. check # chunksizes == rank of variable */
if(rank != chunkspec->rank) {ret = NC_EINVAL; goto done;}
/* 2. check that chunksizes are legal for the given dimension sizes */
for(i=0;i<rank;i++) {
size_t len;
ret = nc_inq_dimlen(igrp,dimids[i],&len);
if(ret != NC_NOERR) goto done;
if(chunkspec->chunksizes[i] > len) {ret = NC_EBADCHUNK; goto done;}
}
notchunked:
/* add the chunkspec to our list */
listpush(varchunkspecs,chunkspec);
chunkspec = NULL;
done:
if(chunkspec != NULL)
free(chunkspec);
if(spec != NULL)
free(spec);
return ret;
}
/* Accessors */
/* Return NC_CHUNKED || NC_CONTIGUOUS || NC_COMPACT */
int
varchunkspec_kind(int grpid, int varid)
{
int i;
for(i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == grpid && spec->ivarid == varid)
return spec->kind;
}
return NC_CONTIGUOUS; /* default */
}
bool_t
varchunkspec_exists(int igrpid, int ivarid)
{
int i;
for(i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return true;
}
return false;
}
bool_t
varchunkspec_omit(int igrpid, int ivarid)
{
int i;
for(i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return spec->omit;
}
return dimchunkspecs.omit;
}
size_t*
varchunkspec_chunksizes(int igrpid, int ivarid)
{
int i;
for(i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return spec->chunksizes;
}
return NULL;
}
size_t
varchunkspec_rank(int igrpid, int ivarid)
{
int i;
for(i=0;i<listlength(varchunkspecs);i++) {
struct VarChunkSpec* spec = listget(varchunkspecs,i);
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
return spec->rank;
}
return 0;
}