mirror of
https://github.com/Unidata/netcdf-c.git
synced 2025-01-06 15:34:44 +08:00
f376c23329
re: https://github.com/Unidata/netcdf-c/issues/1642 Modify ncdump, nccopy, and ncgen to support the NC_COMPACT storage option. Added test cases and added description to the man pages for the utilities. 1. ncdump: For compact storage variable, print special attribute __Storage_ as ```` <var>: _Storage = "compact"; ```` 2. ncgen: parse and implement ```` <var>: _Storage = "compact"; ```` in a .cdl file 3. nccopy: Extend the chunk specification (-c flag) to support compact using the forms ```` nccopy ... -c <var>:compact and nccopy ... -c <var>:contiguous ```` Misc. other changes 1. cleanup the copy_chunking function in ncdump/nccopy.c
396 lines
11 KiB
C
396 lines
11 KiB
C
/*********************************************************************
|
|
* Copyright 2018, UCAR/Unidata
|
|
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
|
* $Id $
|
|
*********************************************************************/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include "netcdf.h"
|
|
#include "list.h"
|
|
#include "utils.h"
|
|
#include "chunkspec.h"
|
|
|
|
/* Structure mapping dimension IDs to corresponding chunksizes. */
|
|
static struct DimChunkSpecs {
|
|
size_t ndims; /* number of dimensions in chunkspec string */
|
|
int *idimids; /* (input) ids for dimensions in chunkspec string */
|
|
size_t *chunksizes; /* corresponding chunk sizes */
|
|
bool_t omit; /* true if chunking to be turned off */
|
|
} dimchunkspecs;
|
|
|
|
struct VarChunkSpec {
|
|
size_t rank; /* number of dimensions in chunkspec string */
|
|
size_t chunksizes[NC_MAX_VAR_DIMS]; /* corresponding chunk sizes */
|
|
bool_t omit; /* true if chunking to be turned off */
|
|
int kind;
|
|
int igrpid; /* container of the (input) variable */
|
|
int ivarid; /* (input) Variable whose chunks are specified */
|
|
};
|
|
|
|
static List* varchunkspecs = NULL; /* List<VarChunkSpec> */
|
|
|
|
/* Forward */
|
|
static int dimchunkspec_parse(int ncid, const char *spec);
|
|
static int varchunkspec_parse(int ncid, const char *spec);
|
|
|
|
void
|
|
chunkspecinit(void)
|
|
{
|
|
/* initialization */
|
|
if(varchunkspecs == NULL)
|
|
varchunkspecs = listnew();
|
|
memset(&dimchunkspecs,0,sizeof(dimchunkspecs));
|
|
}
|
|
|
|
/*
|
|
* Parse chunkspec string of either kind.
|
|
* Returns NC_NOERR if no error, NC_EINVAL if spec was malformed.
|
|
*/
|
|
int
|
|
chunkspec_parse(int igrp, const char *spec)
|
|
{
|
|
/* Decide if this is a per-variable or per-dimension chunkspec */
|
|
if (!spec || *spec == '\0')
|
|
return NC_NOERR; /* Use defaults */
|
|
if(strchr(spec,':') == NULL)
|
|
return dimchunkspec_parse(igrp,spec);
|
|
else
|
|
return varchunkspec_parse(igrp,spec);
|
|
}
|
|
|
|
/*
|
|
* Parse chunkspec string and convert into dimchunkspec structure.
|
|
* ncid: location ID of open netCDF file or group in an open file
|
|
* spec: string of form
|
|
* dim1/n1,dim2/n2,...,dimk/nk
|
|
* specifying chunk size (ni) to be used for dimension named
|
|
* dimi. Dimension names may be absolute,
|
|
* e.g. "/grp_a/grp_a1/dim". The "ni" part of the spec may be
|
|
* omitted, in which case it is assumed to be the entire
|
|
* dimension size. That is also the default for dimensions
|
|
* not mentioned in the string. However, for unlimited dimensions,
|
|
* the default is a default size: 4 megabytes or the
|
|
* existing unlimited size if smaller.
|
|
* If the chunkspec string is "/", specifying no dimensions or
|
|
* chunk sizes, it indicates chunking to be turned off on output.
|
|
*
|
|
* Returns NC_NOERR if no error, NC_EINVAL if spec has consecutive
|
|
* unescaped commas or no chunksize specified for dimension.
|
|
*/
|
|
static int
|
|
dimchunkspec_parse(int igrp, const char *spec)
|
|
{
|
|
const char *cp; /* character cursor */
|
|
const char *pp = spec; /* previous char cursor for detecting escapes */
|
|
const char *np; /* beginning of current dimension name */
|
|
size_t ndims = 0;
|
|
int idim;
|
|
int ret = NC_NOERR;
|
|
int comma_seen = 0;
|
|
|
|
dimchunkspecs.ndims = 0;
|
|
dimchunkspecs.omit = false;
|
|
if (!spec || *spec == '\0') /* default chunking */
|
|
goto done;
|
|
/* Special rule: // is treated as equivalent to / */
|
|
if ((spec[0] == '/' && spec[1] == '\0')
|
|
|| (spec[0] == '/' && spec[1] == '/' && spec[2] == '\0')) { /* no chunking */
|
|
dimchunkspecs.omit = true;
|
|
goto done;
|
|
}
|
|
/* Count unescaped commas, handle consecutive unescaped commas as error */
|
|
for(cp = spec; *cp; cp++) {
|
|
if(*cp == ',' && *pp != '\\') {
|
|
if(comma_seen) { /* consecutive commas detected */
|
|
{ret = NC_EINVAL; goto done;}
|
|
}
|
|
comma_seen = 1;
|
|
ndims++;
|
|
} else {
|
|
comma_seen = 0;
|
|
}
|
|
pp = cp;
|
|
}
|
|
ndims++;
|
|
dimchunkspecs.ndims = ndims;
|
|
dimchunkspecs.idimids = (int *) emalloc(ndims * sizeof(int));
|
|
dimchunkspecs.chunksizes = (size_t *) emalloc(ndims * sizeof(size_t));
|
|
/* Look up dimension ids and assign chunksizes */
|
|
pp = spec;
|
|
np = spec;
|
|
idim = 0;
|
|
for(cp = spec; ; cp++) {
|
|
if(*cp == '\0' || (*cp == ',' && *pp != '\\')) { /* found end of "dim/nn" part */
|
|
char* dimname = 0;
|
|
char *dp;
|
|
int dimid;
|
|
size_t chunksize;
|
|
|
|
for(; pp > np && *pp != '/'; pp--) { /* look backwards for "/" */
|
|
continue;
|
|
}
|
|
if(*pp != '/') { /* no '/' found, no chunksize specified for dimension */
|
|
ret = NC_EINVAL;
|
|
goto done;
|
|
}
|
|
/* extract dimension name */
|
|
dimname = (char *) emalloc(pp - np + 1);
|
|
dp = dimname;
|
|
while(np < pp) {
|
|
*dp++ = *np++;
|
|
}
|
|
*dp = '\0';
|
|
/* look up dimension id from dimension pathname */
|
|
ret = nc_inq_dimid2(igrp, dimname, &dimid);
|
|
if(ret != NC_NOERR)
|
|
{if(dimname) free(dimname); goto done;}
|
|
dimchunkspecs.idimids[idim] = dimid;
|
|
/* parse and assign corresponding chunksize */
|
|
pp++; /* now points to first digit of chunksize, ',', or '\0' */
|
|
if(*pp == ',' || *pp == '\0') { /* no size specified, use dim len */
|
|
size_t dimlen;
|
|
ret = nc_inq_dimlen(igrp, dimid, &dimlen);
|
|
if(ret != NC_NOERR)
|
|
{if(dimname) free(dimname); goto done;}
|
|
chunksize = dimlen;
|
|
} else { /* convert nnn string to long long integer */
|
|
char *ep;
|
|
#ifdef HAVE_STRTOLL
|
|
long long val = strtoll(pp, &ep, 0);
|
|
#else
|
|
long long val = strtol(pp, &ep, 0);
|
|
#endif
|
|
if(ep == pp || errno == ERANGE || val < 1) /* allow chunksize bigger than dimlen */
|
|
{if(dimname) free(dimname); ret = NC_EINVAL; goto done;}
|
|
chunksize = (size_t)val;
|
|
}
|
|
dimchunkspecs.chunksizes[idim] = chunksize;
|
|
idim++;
|
|
if(dimname) free(dimname);
|
|
dimname = NULL;
|
|
if(*cp == '\0')
|
|
break;
|
|
/* set np to point to first char after comma */
|
|
np = cp + 1;
|
|
}
|
|
pp = cp;
|
|
};
|
|
done:
|
|
return ret;
|
|
}
|
|
|
|
/* Return size in chunkspec string specified for dimension corresponding to dimid, 0 if not found */
|
|
size_t
|
|
dimchunkspec_size(int indimid) {
|
|
int idim;
|
|
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
|
|
if(indimid == dimchunkspecs.idimids[idim]) {
|
|
return dimchunkspecs.chunksizes[idim];
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
/* Return number of dimensions for which chunking was specified in
|
|
* chunkspec string on command line, 0 if no chunkspec string was
|
|
* specified. */
|
|
int
|
|
dimchunkspec_ndims(void) {
|
|
return dimchunkspecs.ndims;
|
|
}
|
|
|
|
/* Return whether chunking should be omitted, due to explicit
|
|
* command-line specification. */
|
|
bool_t
|
|
dimchunkspec_omit(void) {
|
|
return dimchunkspecs.omit;
|
|
}
|
|
|
|
|
|
/* Return whether chunking should be omitted, due to explicit
|
|
* command-line specification. */
|
|
bool_t
|
|
dimchunkspec_exists(int indimid) {
|
|
int idim;
|
|
for(idim = 0; idim < dimchunkspecs.ndims; idim++) {
|
|
if(indimid == dimchunkspecs.idimids[idim]) {
|
|
return 1;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
|
|
/*
|
|
* Parse per-variable chunkspec string and convert into varchunkspec structure.
|
|
* ncid: location ID of open netCDF file or group in an open file
|
|
* spec: string of form
|
|
* var:n1,n2,...nk
|
|
*
|
|
* specifying chunk size (ni) to be used for ith dimension of
|
|
* variable named var. Variable names may be absolute.
|
|
* e.g. "/grp_a/grp_a1/var".
|
|
* If no chunk sizes are specified, then the variable is not chunked at all.
|
|
*
|
|
* Returns NC_NOERR if no error, NC_EINVAL if spec has consecutive
|
|
* unescaped commas or no chunksize specified for dimension.
|
|
*/
|
|
static int
|
|
varchunkspec_parse(int igrp, const char *spec0)
|
|
{
|
|
int ret = NC_NOERR;
|
|
int rank;
|
|
int i;
|
|
int dimids[NC_MAX_VAR_DIMS];
|
|
struct VarChunkSpec* chunkspec = NULL;
|
|
char* spec = NULL;
|
|
char* p, *q; /* for walking strings */
|
|
|
|
/* Copy spec so we can modify in place */
|
|
spec = strdup(spec0);
|
|
if(spec == NULL) {ret = NC_ENOMEM; goto done;}
|
|
|
|
chunkspec = calloc(1,sizeof(struct VarChunkSpec));
|
|
if(chunkspec == NULL) {ret = NC_ENOMEM; goto done;}
|
|
|
|
chunkspec->igrpid = igrp;
|
|
|
|
/* First, find the end of the variable part */
|
|
p = strchr(spec,':');
|
|
if(p == NULL)
|
|
{ret = NC_EINVAL; goto done;}
|
|
*p++ = '\0';
|
|
|
|
/* Lookup the variable by name */
|
|
ret = nc_inq_varid2(igrp, spec, &chunkspec->ivarid, &chunkspec->igrpid);
|
|
if(ret != NC_NOERR) goto done;
|
|
|
|
if(*p == '\0') {/* we have -c var: => do not chunk var */
|
|
chunkspec->omit = 1;
|
|
/* add the chunkspec to our list */
|
|
listpush(varchunkspecs,chunkspec);
|
|
chunkspec = NULL;
|
|
goto done;
|
|
}
|
|
|
|
/* See if the remainder matches 'compact' or 'contiguous' */
|
|
if(strcasecmp(p,"compact")==0) {
|
|
chunkspec->kind = NC_COMPACT;
|
|
goto notchunked;
|
|
} if(strcasecmp(p,"contiguous")==0) {
|
|
chunkspec->kind = NC_CONTIGUOUS;
|
|
goto notchunked;
|
|
} else
|
|
chunkspec->kind = NC_CHUNKED;
|
|
|
|
/* Iterate over dimension sizes */
|
|
while(*p) {
|
|
unsigned long dimsize;
|
|
q = strchr(p,',');
|
|
if(q == NULL)
|
|
q = p + strlen(p); /* Fake the endpoint */
|
|
else
|
|
*q++ = '\0';
|
|
|
|
/* Scan as unsigned long */
|
|
if(sscanf(p,"%lu",&dimsize) != 1)
|
|
{ret = NC_EINVAL; goto done;} /* Apparently not a valid dimension size */
|
|
if(chunkspec->rank >= NC_MAX_VAR_DIMS) {ret = NC_EINVAL; goto done;} /* to many chunks */
|
|
chunkspec->chunksizes[chunkspec->rank] = (size_t)dimsize;
|
|
chunkspec->rank++;
|
|
p = q;
|
|
}
|
|
/* Now do some validity checking */
|
|
/* Get some info about the var (from input) */
|
|
ret = nc_inq_var(chunkspec->igrpid,chunkspec->ivarid,NULL,NULL,&rank,dimids,NULL);
|
|
if(ret != NC_NOERR) goto done;
|
|
|
|
/* 1. check # chunksizes == rank of variable */
|
|
if(rank != chunkspec->rank) {ret = NC_EINVAL; goto done;}
|
|
|
|
/* 2. check that chunksizes are legal for the given dimension sizes */
|
|
for(i=0;i<rank;i++) {
|
|
size_t len;
|
|
ret = nc_inq_dimlen(igrp,dimids[i],&len);
|
|
if(ret != NC_NOERR) goto done;
|
|
if(chunkspec->chunksizes[i] > len) {ret = NC_EBADCHUNK; goto done;}
|
|
}
|
|
|
|
notchunked:
|
|
/* add the chunkspec to our list */
|
|
listpush(varchunkspecs,chunkspec);
|
|
chunkspec = NULL;
|
|
|
|
done:
|
|
if(chunkspec != NULL)
|
|
free(chunkspec);
|
|
if(spec != NULL)
|
|
free(spec);
|
|
return ret;
|
|
}
|
|
|
|
/* Accessors */
|
|
|
|
/* Return NC_CHUNKED || NC_CONTIGUOUS || NC_COMPACT */
|
|
int
|
|
varchunkspec_kind(int grpid, int varid)
|
|
{
|
|
int i;
|
|
for(i=0;i<listlength(varchunkspecs);i++) {
|
|
struct VarChunkSpec* spec = listget(varchunkspecs,i);
|
|
if(spec->igrpid == grpid && spec->ivarid == varid)
|
|
return spec->kind;
|
|
}
|
|
return NC_CONTIGUOUS; /* default */
|
|
}
|
|
|
|
bool_t
|
|
varchunkspec_exists(int igrpid, int ivarid)
|
|
{
|
|
int i;
|
|
for(i=0;i<listlength(varchunkspecs);i++) {
|
|
struct VarChunkSpec* spec = listget(varchunkspecs,i);
|
|
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool_t
|
|
varchunkspec_omit(int igrpid, int ivarid)
|
|
{
|
|
int i;
|
|
for(i=0;i<listlength(varchunkspecs);i++) {
|
|
struct VarChunkSpec* spec = listget(varchunkspecs,i);
|
|
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
|
|
return spec->omit;
|
|
}
|
|
return dimchunkspecs.omit;
|
|
}
|
|
|
|
size_t*
|
|
varchunkspec_chunksizes(int igrpid, int ivarid)
|
|
{
|
|
int i;
|
|
for(i=0;i<listlength(varchunkspecs);i++) {
|
|
struct VarChunkSpec* spec = listget(varchunkspecs,i);
|
|
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
|
|
return spec->chunksizes;
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
size_t
|
|
varchunkspec_rank(int igrpid, int ivarid)
|
|
{
|
|
int i;
|
|
for(i=0;i<listlength(varchunkspecs);i++) {
|
|
struct VarChunkSpec* spec = listget(varchunkspecs,i);
|
|
if(spec->igrpid == igrpid && spec->ivarid == ivarid)
|
|
return spec->rank;
|
|
}
|
|
return 0;
|
|
}
|