/********************************************************************* * Copyright 2010, University Corporation for Atmospheric Research * See netcdf/README file for copying and redistribution conditions. * Thanks to Philippe Poilbarbe and Antonio S. CofiƱo for * compression additions. * $Id: nccopy.c 400 2010-08-27 21:02:52Z russ $ *********************************************************************/ #include "config.h" /* for USE_NETCDF4 macro */ #include #include #ifdef HAVE_GETOPT_H #include #endif #ifdef HAVE_UNISTD_H #include #endif #include #include "netcdf.h" #include "nciter.h" #include "utils.h" #include "chunkspec.h" #include "dimmap.h" #include "nccomps.h" #include "ncfilter.h" #undef DEBUGFILTER #ifdef _MSC_VER #include "XGetopt.h" #define snprintf _snprintf int opterr; int optind; #endif /* default bytes of memory we are willing to allocate for variable * values during copy */ #define COPY_BUFFER_SIZE (5000000) #define COPY_CHUNKCACHE_PREEMPTION (1.0f) /* for copying, can eject fully read chunks */ #define SAME_AS_INPUT (-1) /* default, if kind not specified */ #define CHUNK_THRESHOLD (8192) /* non-record variables with fewer bytes don't get chunked */ #ifndef USE_NETCDF4 #define NC_CLASSIC_MODEL 0x0100 /* Enforce classic model if netCDF-4 not available. */ #endif /* Ascii characters requiring escaping as lead*/ #define ESCAPESD "0123456789" #define ESCAPES " !\"#$%&'()*,:;<=>?[]\\^`{|}~" #ifdef USE_NETCDF4 /* The unique id for a variable requires also the enclosing group id */ typedef struct VarID { int grpid; int varid; } VarID; #define MAX_FILTER_SPECS 64 #define MAX_FILTER_PARAMS 256 struct FilterSpec { char* fqn; int nofilter; /* 1=> do not apply any filters to this variable */ unsigned int filterid; size_t nparams; unsigned int* params; }; static int nfilterspecs = 0; /* Number of defined filter specs */ static struct FilterSpec filterspecs[MAX_FILTER_SPECS]; static int suppressfilters = 0; /* 1 => do not apply any output filters unless specified */ #endif /* Global variables for command-line requests */ char *progname; /* for error messages */ static int option_kind = SAME_AS_INPUT; static int option_deflate_level = -1; /* default, compress output only if input compressed */ static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */ static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */ static char* option_chunkspec = 0; /* default, no chunk specification */ static size_t option_copy_buffer_size = COPY_BUFFER_SIZE; static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */ static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */ static int option_read_diskless = 0; /* default, don't read input into memory on open */ static int option_write_diskless = 0; /* default, don't write output to diskless file */ #ifdef USE_NETCDF4 static int option_min_chunk_bytes = CHUNK_THRESHOLD; /* default, don't chunk variable if prod of * chunksizes of its dimensions is smaller * than this */ #endif static int option_nlgrps = 0; /* Number of groups specified with -g * option on command line */ static char** option_lgrps = 0; /* list of group names specified with -g * option on command line */ static idnode_t* option_grpids = 0; /* list of grpids matching list specified with -g option */ static bool_t option_grpstruct = false; /* if -g set, copy structure for non-selected groups */ static int option_nlvars = 0; /* Number of variables specified with -v * option on command line */ static char** option_lvars = 0; /* list of variable names specified with -v * option on command line */ static bool_t option_varstruct = false; /* if -v set, copy structure for non-selected vars */ static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of * chunk cache for each variable */ /* get group id in output corresponding to group igrp in input, * given parent group id (or root group id) parid in output. */ static int get_grpid(int igrp, int parid, int *ogrpp) { int stat = NC_NOERR; int ogid = parid; /* like igrp but in output file */ #ifdef USE_NETCDF4 int inparid; /* if not root group, get corresponding output groupid from group name */ stat = nc_inq_grp_parent(igrp, &inparid); if(stat == NC_NOERR) { /* not root group */ char grpname[NC_MAX_NAME + 1]; NC_CHECK(nc_inq_grpname(igrp, grpname)); NC_CHECK(nc_inq_grp_ncid(parid, grpname, &ogid)); } else if(stat == NC_ENOGRP) { /* root group */ stat = NC_NOERR; } else { NC_CHECK(stat); } #endif /* USE_NETCDF4 */ *ogrpp = ogid; return stat; } /* Return size in bytes of a variable value */ static size_t val_size(int grpid, int varid) { nc_type vartype; size_t value_size; NC_CHECK(nc_inq_vartype(grpid, varid, &vartype)); NC_CHECK(nc_inq_type(grpid, vartype, NULL, &value_size)); return value_size; } #ifdef USE_NETCDF4 /* Get parent id needed to define a new group from its full name in an * open file identified by ncid. Assumes all intermediate groups are * already defined. */ static int nc_inq_parid(int ncid, const char *fullname, int *locidp) { char *parent = strdup(fullname); char *slash = "/"; /* groupname separator */ char *last_slash; if(parent == NULL) { return NC_ENOMEM; /* exits */ } last_slash = strrchr(parent, '/'); if(last_slash == parent || last_slash == NULL) { /* parent is root */ free(parent); parent = strdup(slash); } else { *last_slash = '\0'; /* truncate to get parent name */ } NC_CHECK(nc_inq_grp_full_ncid(ncid, parent, locidp)); free(parent); return NC_NOERR; } /* Compute the fully qualified name of a (grpid,varid) pair; caller must free */ static int computeFQN(VarID vid, char** fqnp) { int stat = NC_NOERR; size_t len; char* fqn = NULL; char vname[NC_MAX_NAME+1]; char escname[(2*NC_MAX_NAME)+1]; int first; char *p, *q; if((stat = nc_inq_grpname_full(vid.grpid,&len,NULL))) goto done; fqn = (char*)malloc(len+1+(2*NC_MAX_NAME)+1); if(fqn == NULL) {stat = NC_ENOMEM; goto done;} if((stat=nc_inq_grpname_full(vid.grpid,&len,fqn))) goto done; fqn[len] = '\0'; /* guarantee */ if((stat=nc_inq_varname(vid.grpid,vid.varid,vname))) goto done; vname[NC_MAX_NAME] = '\0'; if(strlen(fqn) > 1) strcat(fqn,"/"); p = vname; q = escname; for(first=1;*p;first=0) { if((first && strchr(ESCAPESD,*p) != NULL) || strchr(ESCAPES,*p) != NULL) *q++ = '\\'; *q++ = *p++; } *q++ = '\0'; /* guarantee */ strcat(fqn,escname); done: if(stat == NC_NOERR && fqnp != NULL) *fqnp = fqn; return stat; } #if 0 static int parseFQN(int ncid, const char* fqn0, VarID* idp) { int stat = NC_NOERR; char* fqn; VarID vid; char* p; char* q; char* segment; vid.grpid = ncid; if(fqn0 == NULL || fqn0[1] != '/') {stat = NC_EBADNAME; goto done;} fqn = strdup(fqn0+1); /* skip leading '/'*/ p = fqn; for(;;) { int newgrp; segment = p; q = p; while(*p != '\0' && *p != '/') { if(*p == '\\') p++; *q++ = *p++; } if(*p == '\0') break; *p++ = '\0'; if((stat=nc_inq_grp_ncid(vid.grpid,segment,&newgrp))) goto done; vid.grpid = newgrp; } /* Segment should point to the varname */ if((stat=nc_inq_varid(vid.grpid,segment,&vid.varid))) goto done; done: if(fqn) free(fqn); if(stat == NC_NOERR && idp != NULL) *idp = vid; return stat; } #endif static int parsefilterspec(const char* optarg0, struct FilterSpec* spec) { int stat = NC_NOERR; char* optarg = NULL; unsigned int* params = NULL; size_t nparams; unsigned int id; char* p = NULL; char* remainder = NULL; if(optarg0 == NULL || strlen(optarg0) == 0 || spec == NULL) return 0; memset(spec,0,sizeof(struct FilterSpec)); optarg = strdup(optarg0); /* Collect the fqn, taking escapes into account */ p = optarg; remainder = NULL; for(;*p;p++) { if(*p == '\\') p++; else if(*p == ',') {*p = '\0'; remainder = p+1; break;} else if(*p == '\0') {remainder = p; break;} /* else continue */ } if(strlen(optarg) == 0) {stat = NC_EINVAL; goto done;} /* fqn does not exist */ /* Make sure leading '/' is in place */ if(optarg[0]=='/') spec->fqn = strdup(optarg); else { spec->fqn = (char*)malloc(1+strlen(optarg)+1); strcpy(spec->fqn,"/"); strcat(spec->fqn,optarg); } /* Check for special cases */ if(strcmp(remainder,"none") == 0) { spec->nofilter = 1; goto done; } /* Collect the id+parameters */ if((stat = NC_parsefilterspec(remainder,&id,&nparams,¶ms)) == NC_NOERR) { if(spec != NULL) { spec->filterid = id; spec->nparams = nparams; spec->params = params; } } done: if(optarg) free(optarg); return stat; } /* Return size of chunk in bytes for a variable varid in a group igrp, or 0 if * layout is contiguous */ static int inq_var_chunksize(int igrp, int varid, size_t* chunksizep) { int stat = NC_NOERR; int ndims; size_t *chunksizes; int dim; int contig = 1; nc_type vartype; size_t value_size; size_t prod; NC_CHECK(nc_inq_vartype(igrp, varid, &vartype)); /* from type, get size in memory needed for each value */ NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size)); prod = value_size; NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); chunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t)); if(ndims > 0) { NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, NULL)); } if(contig == 1) { *chunksizep = 0; } else { NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunksizes)); for(dim = 0; dim < ndims; dim++) { prod *= chunksizes[dim]; } *chunksizep = prod; } free(chunksizes); return stat; } /* Return estimated number of elems required in chunk cache and * estimated size of chunk cache adequate to efficiently copy input * variable ivarid to output variable ovarid, which may have different * chunk size and shape */ static int inq_var_chunking_params(int igrp, int ivarid, int ogrp, int ovarid, size_t* chunkcache_sizep, size_t *chunkcache_nelemsp, float * chunkcache_preemptionp) { int stat = NC_NOERR; int ndims; size_t *ichunksizes, *ochunksizes; int dim; int icontig = 1, ocontig = 1; nc_type vartype; size_t value_size; size_t prod, iprod, oprod; size_t nelems; *chunkcache_nelemsp = CHUNK_CACHE_NELEMS; *chunkcache_sizep = CHUNK_CACHE_SIZE; *chunkcache_preemptionp = COPY_CHUNKCACHE_PREEMPTION; NC_CHECK(nc_inq_varndims(igrp, ivarid, &ndims)); if(ndims > 0) { NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, NULL)); NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, NULL)); } if(icontig == 1 && ocontig == 1) { /* no chunking in input or output */ *chunkcache_nelemsp = 0; *chunkcache_sizep = 0; *chunkcache_preemptionp = 0; return stat; } NC_CHECK(nc_inq_vartype(igrp, ivarid, &vartype)); NC_CHECK(nc_inq_type(igrp, vartype, NULL, &value_size)); iprod = value_size; if(icontig == 0 && ocontig == 1) { /* chunking only in input */ *chunkcache_nelemsp = 1; /* read one input chunk at a time */ *chunkcache_sizep = iprod; *chunkcache_preemptionp = 1.0f; return stat; } ichunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t)); if(icontig == 1) { /* if input contiguous, treat as if chunked on * first dimension */ ichunksizes[0] = 1; for(dim = 1; dim < ndims; dim++) { ichunksizes[dim] = dim; } } else { NC_CHECK(nc_inq_var_chunking(igrp, ivarid, &icontig, ichunksizes)); } /* now can assume chunking in both input and output */ ochunksizes = (size_t *) emalloc((ndims + 1) * sizeof(size_t)); NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &ocontig, ochunksizes)); nelems = 1; oprod = value_size; for(dim = 0; dim < ndims; dim++) { nelems += 1 + (ichunksizes[dim] - 1) / ochunksizes[dim]; iprod *= ichunksizes[dim]; oprod *= ochunksizes[dim]; } prod = iprod + oprod * (nelems - 1); *chunkcache_nelemsp = nelems; *chunkcache_sizep = prod; free(ichunksizes); free(ochunksizes); return stat; } /* Forward declaration, because copy_type, copy_vlen_type call each other */ static int copy_type(int igrp, nc_type typeid, int ogrp); /* * copy a user-defined variable length type in the group igrp to the * group ogrp */ static int copy_vlen_type(int igrp, nc_type itype, int ogrp) { int stat = NC_NOERR; nc_type ibasetype; nc_type obasetype; /* base type in target group */ char name[NC_MAX_NAME]; size_t size; char basename[NC_MAX_NAME]; size_t basesize; nc_type vlen_type; NC_CHECK(nc_inq_vlen(igrp, itype, name, &size, &ibasetype)); /* to get base type id in target group, use name of base type in * source group */ NC_CHECK(nc_inq_type(igrp, ibasetype, basename, &basesize)); stat = nc_inq_typeid(ogrp, basename, &obasetype); /* if no such type, create it now */ if(stat == NC_EBADTYPE) { NC_CHECK(copy_type(igrp, ibasetype, ogrp)); stat = nc_inq_typeid(ogrp, basename, &obasetype); } NC_CHECK(stat); /* Now we know base type exists in output and we know its type id */ NC_CHECK(nc_def_vlen(ogrp, name, obasetype, &vlen_type)); return stat; } /* * copy a user-defined opaque type in the group igrp to the group ogrp */ static int copy_opaque_type(int igrp, nc_type itype, int ogrp) { int stat = NC_NOERR; nc_type otype; char name[NC_MAX_NAME]; size_t size; NC_CHECK(nc_inq_opaque(igrp, itype, name, &size)); NC_CHECK(nc_def_opaque(ogrp, size, name, &otype)); return stat; } /* * copy a user-defined enum type in the group igrp to the group ogrp */ static int copy_enum_type(int igrp, nc_type itype, int ogrp) { int stat = NC_NOERR; nc_type otype; nc_type basetype; size_t basesize; size_t nmembers; char name[NC_MAX_NAME]; int i; NC_CHECK(nc_inq_enum(igrp, itype, name, &basetype, &basesize, &nmembers)); NC_CHECK(nc_def_enum(ogrp, basetype, name, &otype)); for(i = 0; i < nmembers; i++) { /* insert enum members */ char ename[NC_MAX_NAME]; long long val; /* large enough to hold any integer type */ NC_CHECK(nc_inq_enum_member(igrp, itype, i, ename, &val)); NC_CHECK(nc_insert_enum(ogrp, otype, ename, &val)); } return stat; } /* * copy a user-defined compound type in the group igrp to the group ogrp */ static int copy_compound_type(int igrp, nc_type itype, int ogrp) { int stat = NC_NOERR; char name[NC_MAX_NAME]; size_t size; size_t nfields; nc_type otype; int fid; NC_CHECK(nc_inq_compound(igrp, itype, name, &size, &nfields)); NC_CHECK(nc_def_compound(ogrp, size, name, &otype)); for (fid = 0; fid < nfields; fid++) { char fname[NC_MAX_NAME]; char ftypename[NC_MAX_NAME]; size_t foff; nc_type iftype, oftype; int fndims; NC_CHECK(nc_inq_compound_field(igrp, itype, fid, fname, &foff, &iftype, &fndims, NULL)); /* type ids in source don't necessarily correspond to same * typeids in destination, so look up destination typeid by using * field type name */ NC_CHECK(nc_inq_type(igrp, iftype, ftypename, NULL)); NC_CHECK(nc_inq_typeid(ogrp, ftypename, &oftype)); if(fndims == 0) { NC_CHECK(nc_insert_compound(ogrp, otype, fname, foff, oftype)); } else { /* field is array type */ int *fdimsizes; fdimsizes = (int *) emalloc((fndims + 1) * sizeof(int)); stat = nc_inq_compound_field(igrp, itype, fid, NULL, NULL, NULL, NULL, fdimsizes); NC_CHECK(nc_insert_array_compound(ogrp, otype, fname, foff, oftype, fndims, fdimsizes)); free(fdimsizes); } } return stat; } /* * copy a user-defined type in the group igrp to the group ogrp */ static int copy_type(int igrp, nc_type typeid, int ogrp) { int stat = NC_NOERR; nc_type type_class; NC_CHECK(nc_inq_user_type(igrp, typeid, NULL, NULL, NULL, NULL, &type_class)); switch(type_class) { case NC_VLEN: NC_CHECK(copy_vlen_type(igrp, typeid, ogrp)); break; case NC_OPAQUE: NC_CHECK(copy_opaque_type(igrp, typeid, ogrp)); break; case NC_ENUM: NC_CHECK(copy_enum_type(igrp, typeid, ogrp)); break; case NC_COMPOUND: NC_CHECK(copy_compound_type(igrp, typeid, ogrp)); break; default: NC_CHECK(NC_EBADTYPE); } return stat; } /* Copy a group and all its subgroups, recursively, from iroot to * oroot, the ncids of input file and output file. This just creates * all the groups in the destination, but doesn't copy anything that's * in the groups yet. */ static int copy_groups(int iroot, int oroot) { int stat = NC_NOERR; int numgrps; int *grpids; int i; /* get total number of groups and their ids, including all descendants */ NC_CHECK(nc_inq_grps_full(iroot, &numgrps, NULL)); if(numgrps > 1) { /* there's always 1 root group */ grpids = emalloc(numgrps * sizeof(int)); NC_CHECK(nc_inq_grps_full(iroot, NULL, grpids)); /* create corresponding new groups in ogrp, except for root group */ for(i = 1; i < numgrps; i++) { char *grpname_full; char grpname[NC_MAX_NAME]; size_t len_name; int ogid = 0, oparid = 0, iparid = 0; /* get full group name of input group */ NC_CHECK(nc_inq_grpname(grpids[i], grpname)); if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) { NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, NULL)); grpname_full = emalloc(len_name + 1); NC_CHECK(nc_inq_grpname_full(grpids[i], &len_name, grpname_full)); /* Make sure, the parent group is also wanted (root group is always wanted) */ NC_CHECK(nc_inq_parid(iroot, grpname_full, &iparid)); if (!option_grpstruct && !group_wanted(iparid, option_nlgrps, option_grpids) && iparid != iroot) { error("ERROR: trying to copy a group but not the parent: %s", grpname_full); } /* get id of parent group of corresponding group in output. * Note that this exists, because nc_inq_groups returned * grpids in preorder, so parents are always copied before * their subgroups */ NC_CHECK(nc_inq_parid(oroot, grpname_full, &oparid)); NC_CHECK(nc_inq_grpname(grpids[i], grpname)); /* define corresponding group in output */ NC_CHECK(nc_def_grp(oparid, grpname, &ogid)); free(grpname_full); } } free(grpids); } return stat; } /* * Copy the user-defined types in this group (igrp) and all its * subgroups, recursively, to corresponding group in output (ogrp) */ static int copy_types(int igrp, int ogrp) { int stat = NC_NOERR; int ntypes; nc_type *types = NULL; int numgrps; int *grpids = NULL; int i; NC_CHECK(nc_inq_typeids(igrp, &ntypes, NULL)); if(ntypes > 0) { types = (nc_type *) emalloc(ntypes * sizeof(nc_type)); NC_CHECK(nc_inq_typeids(igrp, &ntypes, types)); for (i = 0; i < ntypes; i++) { NC_CHECK(copy_type(igrp, types[i], ogrp)); } free(types); } /* Copy types from subgroups */ NC_CHECK(nc_inq_grps(igrp, &numgrps, NULL)); if(numgrps > 0) { grpids = (int *)emalloc(sizeof(int) * numgrps); NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids)); for(i = 0; i < numgrps; i++) { if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) { int ogid; /* get groupid in output corresponding to grpids[i] in * input, given parent group (or root group) ogrp in * output */ NC_CHECK(get_grpid(grpids[i], ogrp, &ogid)); NC_CHECK(copy_types(grpids[i], ogid)); } } free(grpids); } return stat; } /* Copy all netCDF-4 specific variable properties such as chunking, * endianness, deflation, checksumming, fill, etc. */ static int copy_var_specials(int igrp, int varid, int ogrp, int o_varid) { int stat = NC_NOERR; { /* handle chunking parameters */ int ndims; NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); if (ndims > 0) { /* no chunking for scalar variables */ int contig = 0; size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t)); int *dimids = (int *) emalloc(ndims * sizeof(int)); int idim; /* size of a chunk: product of dimension chunksizes and size of value */ size_t csprod = val_size(ogrp, o_varid); int is_unlimited = 0; NC_CHECK(nc_inq_var_chunking(igrp, varid, &contig, chunkp)); NC_CHECK(nc_inq_vardimid(igrp, varid, dimids)); for(idim = 0; idim < ndims; idim++) { int idimid = dimids[idim]; int odimid = dimmap_odimid(idimid); size_t chunksize = chunkspec_size(idimid); if(chunksize > 0) { /* found in chunkspec */ chunkp[idim] = chunksize; } csprod *= chunkp[idim]; if(dimmap_ounlim(odimid)) is_unlimited = 1; } /* Explicitly set chunking, even if default */ /* If product of chunksizes is too small and no unlimited * dimensions used, don't chunk. Also if chunking * explicitly turned off with chunk spec, don't chunk. */ if ((csprod < option_min_chunk_bytes && !is_unlimited) || contig == 1 || chunkspec_omit() == true) { NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CONTIGUOUS, NULL)); } else { NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp)); } free(dimids); free(chunkp); } } { /* handle compression parameters, copying from input, overriding * with command-line options */ int shuffle_in=0, deflate_in=0, deflate_level_in=0; int shuffle_out=0, deflate_out=0, deflate_level_out=0; if(option_deflate_level != 0) { NC_CHECK(nc_inq_var_deflate(igrp, varid, &shuffle_in, &deflate_in, &deflate_level_in)); if(option_deflate_level == -1) { /* not specified, copy input compression and shuffling */ shuffle_out = shuffle_in; deflate_out = deflate_in; deflate_level_out = deflate_level_in; } else if(option_deflate_level > 0) { /* change to specified compression, shuffling */ shuffle_out = option_shuffle_vars; deflate_out=1; deflate_level_out = option_deflate_level; } NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle_out, deflate_out, deflate_level_out)); } } { /* handle checksum parameters */ int fletcher32 = 0; NC_CHECK(nc_inq_var_fletcher32(igrp, varid, &fletcher32)); if(fletcher32 != 0) { NC_CHECK(nc_def_var_fletcher32(ogrp, o_varid, fletcher32)); } } { /* handle endianness */ int endianness = 0; NC_CHECK(nc_inq_var_endian(igrp, varid, &endianness)); if(endianness != NC_ENDIAN_NATIVE) { /* native is the default */ NC_CHECK(nc_def_var_endian(ogrp, o_varid, endianness)); } } return stat; } /* Copy netCDF-4 specific variable filter properties */ /* Watch out if input is netcdf-3 */ static int copy_var_filter(int igrp, int varid, int ogrp, int o_varid) { int stat = NC_NOERR; VarID vid = {igrp,varid}; VarID ovid = {ogrp,o_varid}; /* handle filter parameters, copying from input, overriding with command-line options */ struct FilterSpec inspec, ospec, actualspec; int i; char* ofqn = NULL; int format, oformat; int inputdefined, outputdefined, unfiltered; /* Get file format of the input and output */ if((stat=nc_inq_format(vid.grpid,&format))) goto done; if((stat=nc_inq_format(ovid.grpid,&oformat))) goto done; if(oformat != NC_FORMAT_NETCDF4 && oformat != NC_FORMAT_NETCDF4_CLASSIC) goto done; /* Can only use filter when output is some netcdf4 variant */ /* Compute the output vid's FQN */ if((stat = computeFQN(ovid,&ofqn))) goto done; /* Clear the in and out specs */ memset(&inspec,0,sizeof(inspec)); memset(&ospec,0,sizeof(ospec)); memset(&actualspec,0,sizeof(actualspec)); /* Is there a filter on the output variable */ outputdefined = 0; /* default is no filter defined */ /* Only bother to look if output is netcdf-4 variant */ if(oformat == NC_FORMAT_NETCDF4 || oformat == NC_FORMAT_NETCDF4_CLASSIC) { /* See if any output filter spec is defined for this output variable */ for(i=0;i 0) { /* no chunking for scalar variables */ int chunked = 0; int *dimids = (int *) emalloc(ndims * sizeof(int)); size_t varsize; nc_type vartype; size_t value_size; int is_unlimited = 0; NC_CHECK(nc_inq_vardimid (ogrp, o_varid, dimids)); NC_CHECK(nc_inq_vartype(ogrp, o_varid, &vartype)); /* from type, get size in memory needed for each value */ NC_CHECK(nc_inq_type(ogrp, vartype, NULL, &value_size)); varsize = value_size; /* Determine if this variable should be chunked. A variable * should be chunked if any of its dims are in command-line * chunk spec. It will also be chunked if any of its * dims are unlimited. */ for(odim = 0; odim < ndims; odim++) { int odimid = dimids[odim]; int idimid = dimmap_idimid(odimid); /* corresponding dimid in input file */ if(dimmap_ounlim(odimid)) is_unlimited = 1; /* whether vriable is unlimited */ if(idimid != -1) { size_t chunksize = chunkspec_size(idimid); /* from chunkspec */ size_t dimlen; NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen)); if( (chunksize > 0) || dimmap_ounlim(odimid)) { chunked = 1; } if(dimlen > 0) { /* dimlen for unlimited dims is still 0 before copying data */ varsize *= dimlen; } } } /* Don't chunk small variables that don't use an unlimited * dimension. */ if(varsize < chunk_threshold && !is_unlimited) chunked = 0; if(chunked) { /* Allocate chunksizes and set defaults to dimsize for any * dimensions not mentioned in chunkspec, except use 1 for unlimited dims. */ size_t *chunkp = (size_t *) emalloc(ndims * sizeof(size_t)); for(odim = 0; odim < ndims; odim++) { int odimid = dimids[odim]; int idimid = dimmap_idimid(odimid); size_t chunksize = chunkspec_size(idimid); if(chunksize > 0) { chunkp[odim] = chunksize; } else { if(dimmap_ounlim(odimid)){ chunkp[odim] = 1; } else { NC_CHECK(nc_inq_dimlen(ogrp, odimid, &chunkp[odim])); } } } NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, chunkp)); free(chunkp); } free(dimids); } return stat; } /* Set variable to compression specified on command line */ static int set_var_compressed(int ogrp, int o_varid) { int stat = NC_NOERR; if (option_deflate_level > 0) { int deflate = 1; NC_CHECK(nc_def_var_deflate(ogrp, o_varid, option_shuffle_vars, deflate, option_deflate_level)); } return stat; } /* Release the variable chunk cache allocated for variable varid in * group grp. This is not necessary, but will save some memory when * processing one variable at a time. */ #ifdef UNUSED static int free_var_chunk_cache(int grp, int varid) { int stat = NC_NOERR; size_t chunk_cache_size = 1; size_t cache_nelems = 1; float cache_preemp = 0; int kind; NC_CHECK(nc_inq_format(grp, &kind)); if(kind == NC_FORMAT_NETCDF4 || kind == NC_FORMAT_NETCDF4_CLASSIC) { int contig = 1; NC_CHECK(nc_inq_var_chunking(grp, varid, &contig, NULL)); if(contig == 0) { /* chunked */ NC_CHECK(nc_set_var_chunk_cache(grp, varid, chunk_cache_size, cache_nelems, cache_preemp)); } } return stat; } #endif #endif /* USE_NETCDF4 */ /* Copy dimensions from group igrp to group ogrp, also associate input * dimids with output dimids (they need not match, because the input * dimensions may have been defined in a different order than we define * the output dimensions here. */ static int copy_dims(int igrp, int ogrp) { int stat = NC_NOERR; int ndims; int dgrp; #ifdef USE_NETCDF4 int nunlims; int *dimids; int *unlimids; #else int unlimid; #endif /* USE_NETCDF4 */ NC_CHECK(nc_inq_ndims(igrp, &ndims)); #ifdef USE_NETCDF4 /* In netCDF-4 files, dimids may not be sequential because they * may be defined in various groups, and we are only looking at one * group at a time. */ /* Find the dimension ids in this group, don't include parents. */ dimids = (int *) emalloc((ndims + 1) * sizeof(int)); NC_CHECK(nc_inq_dimids(igrp, NULL, dimids, 0)); /* Find the number of unlimited dimensions and get their IDs */ NC_CHECK(nc_inq_unlimdims(igrp, &nunlims, NULL)); unlimids = (int *) emalloc((nunlims + 1) * sizeof(int)); NC_CHECK(nc_inq_unlimdims(igrp, NULL, unlimids)); #else NC_CHECK(nc_inq_unlimdim(igrp, &unlimid)); #endif /* USE_NETCDF4 */ /* Copy each dimension to output, including unlimited dimension(s) */ for (dgrp = 0; dgrp < ndims; dgrp++) { char name[NC_MAX_NAME]; size_t length; int i_is_unlim; int o_is_unlim; int idimid, odimid; #ifdef USE_NETCDF4 int uld; #endif i_is_unlim = 0; #ifdef USE_NETCDF4 idimid = dimids[dgrp]; for (uld = 0; uld < nunlims; uld++) { if(idimid == unlimids[uld]) { i_is_unlim = 1; break; } } #else idimid = dgrp; if(unlimid != -1 && (idimid == unlimid)) { i_is_unlim = 1; } #endif /* USE_NETCDF4 */ stat = nc_inq_dim(igrp, idimid, name, &length); if (stat == NC_EDIMSIZE && sizeof(size_t) < 8) { error("dimension \"%s\" requires 64-bit platform", name); } NC_CHECK(stat); o_is_unlim = i_is_unlim; if(i_is_unlim && !option_fix_unlimdims) { NC_CHECK(nc_def_dim(ogrp, name, NC_UNLIMITED, &odimid)); } else { NC_CHECK(nc_def_dim(ogrp, name, length, &odimid)); o_is_unlim = 0; } /* Store (idimid, odimid) mapping for later use, also whether unlimited */ dimmap_store(idimid, odimid, i_is_unlim, o_is_unlim); } #ifdef USE_NETCDF4 free(dimids); free(unlimids); #endif /* USE_NETCDF4 */ return stat; } /* Copy the attributes for variable ivar in group igrp to variable * ovar in group ogrp. Global (group) attributes are specified by * using the varid NC_GLOBAL */ static int copy_atts(int igrp, int ivar, int ogrp, int ovar) { int natts; int iatt; int stat = NC_NOERR; NC_CHECK(nc_inq_varnatts(igrp, ivar, &natts)); for(iatt = 0; iatt < natts; iatt++) { char name[NC_MAX_NAME]; NC_CHECK(nc_inq_attname(igrp, ivar, iatt, name)); NC_CHECK(nc_copy_att(igrp, ivar, name, ogrp, ovar)); } return stat; } /* copy the schema for a single variable in group igrp to group ogrp */ static int copy_var(int igrp, int varid, int ogrp) { int stat = NC_NOERR; int ndims; int *idimids; /* ids of dims for input variable */ int *odimids; /* ids of dims for output variable */ char name[NC_MAX_NAME]; nc_type typeid, o_typeid; int natts; int i; int o_varid; NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); idimids = (int *) emalloc((ndims + 1) * sizeof(int)); NC_CHECK(nc_inq_var(igrp, varid, name, &typeid, NULL, idimids, &natts)); o_typeid = typeid; #ifdef USE_NETCDF4 if (typeid > NC_STRING) { /* user-defined type */ /* type ids in source don't necessarily correspond to same * typeids in destination, so look up destination typeid by * using type name */ char type_name[NC_MAX_NAME]; NC_CHECK(nc_inq_type(igrp, typeid, type_name, NULL)); NC_CHECK(nc_inq_typeid(ogrp, type_name, &o_typeid)); } #endif /* USE_NETCDF4 */ /* get the corresponding dimids in the output file */ odimids = (int *) emalloc((ndims + 1) * sizeof(int)); for(i = 0; i < ndims; i++) { odimids[i] = dimmap_odimid(idimids[i]); if(odimids[i] == -1) { error("Oops, no dimension in output associated with input dimid %d", idimids[i]); } } /* define the output variable */ NC_CHECK(nc_def_var(ogrp, name, o_typeid, ndims, odimids, &o_varid)); /* attach the variable attributes to the output variable */ NC_CHECK(copy_atts(igrp, varid, ogrp, o_varid)); #ifdef USE_NETCDF4 { int inkind; int outkind; NC_CHECK(nc_inq_format(igrp, &inkind)); NC_CHECK(nc_inq_format(ogrp, &outkind)); if(outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC) { if((inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC)) { /* Copy all netCDF-4 specific variable properties such as * chunking, endianness, deflation, checksumming, fill, etc. */ NC_CHECK(copy_var_specials(igrp, varid, ogrp, o_varid)); } else { /* Set chunking if specified in command line option */ NC_CHECK(set_var_chunked(ogrp, o_varid)); /* Set compression if specified in command line option */ NC_CHECK(set_var_compressed(ogrp, o_varid)); } NC_CHECK(copy_var_filter(igrp, varid, ogrp, o_varid)); } } #endif /* USE_NETCDF4 */ free(idimids); free(odimids); return stat; } /* copy the schema for all the variables in group igrp to group ogrp */ static int copy_vars(int igrp, int ogrp) { int stat = NC_NOERR; int nvars; int varid; int iv; /* variable number */ idnode_t* vlist = 0; /* list for vars specified with -v option */ /* * If any vars were specified with -v option, get list of * associated variable ids relative to this group. Assume vars * specified with syntax like "grp1/grp2/varname" or * "/grp1/grp2/varname" if they are in groups. */ vlist = newidlist(); /* list for vars specified with -v option */ for (iv=0; iv < option_nlvars; iv++) { if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR) idadd(vlist, varid); } NC_CHECK(nc_inq_nvars(igrp, &nvars)); for (varid = 0; varid < nvars; varid++) { if (!option_varstruct && option_nlvars > 0 && ! idmember(vlist, varid)) continue; NC_CHECK(copy_var(igrp, varid, ogrp)); } freeidlist(vlist); return stat; } /* Copy the schema in a group and all its subgroups, recursively, from * group igrp in input to parent group ogrp in destination. Use * dimmap array to map input dimids to output dimids. */ static int copy_schema(int igrp, int ogrp) { int stat = NC_NOERR; int ogid; /* like igrp but in output file */ /* get groupid in output corresponding to group igrp in input, * given parent group (or root group) ogrp in output */ NC_CHECK(get_grpid(igrp, ogrp, &ogid)); NC_CHECK(copy_dims(igrp, ogid)); NC_CHECK(copy_atts(igrp, NC_GLOBAL, ogid, NC_GLOBAL)); NC_CHECK(copy_vars(igrp, ogid)); #ifdef USE_NETCDF4 { int numgrps; int *grpids; int i; /* Copy schema from subgroups */ stat = nc_inq_grps(igrp, &numgrps, NULL); grpids = (int *)emalloc((numgrps + 1) * sizeof(int)); NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids)); for(i = 0; i < numgrps; i++) { if (option_grpstruct || group_wanted(grpids[i], option_nlgrps, option_grpids)) { NC_CHECK(copy_schema(grpids[i], ogid)); } } free(grpids); } #endif /* USE_NETCDF4 */ return stat; } /* Return number of values for a variable varid in a group igrp */ static int inq_nvals(int igrp, int varid, long long *nvalsp) { int stat = NC_NOERR; int ndims; int *dimids; int dim; long long nvals = 1; NC_CHECK(nc_inq_varndims(igrp, varid, &ndims)); dimids = (int *) emalloc((ndims + 1) * sizeof(int)); NC_CHECK(nc_inq_vardimid (igrp, varid, dimids)); for(dim = 0; dim < ndims; dim++) { size_t len; NC_CHECK(nc_inq_dimlen(igrp, dimids[dim], &len)); nvals *= len; } if(nvalsp) *nvalsp = nvals; free(dimids); return stat; } /* Copy data from variable varid in group igrp to corresponding group * ogrp. */ static int copy_var_data(int igrp, int varid, int ogrp) { int stat = NC_NOERR; nc_type vartype; long long nvalues; /* number of values for this variable */ size_t ntoget; /* number of values to access this iteration */ size_t value_size; /* size of a single value of this variable */ static void *buf = 0; /* buffer for the variable values */ char varname[NC_MAX_NAME]; int ovarid; size_t *start; size_t *count; nciter_t *iterp; /* opaque structure for iteration status */ int do_realloc = 0; #ifdef USE_NETCDF4 int okind; size_t chunksize; #endif NC_CHECK(inq_nvals(igrp, varid, &nvalues)); if(nvalues == 0) return stat; /* get corresponding output variable */ NC_CHECK(nc_inq_varname(igrp, varid, varname)); NC_CHECK(nc_inq_varid(ogrp, varname, &ovarid)); NC_CHECK(nc_inq_vartype(igrp, varid, &vartype)); value_size = val_size(igrp, varid); if(value_size > option_copy_buffer_size) { option_copy_buffer_size = value_size; do_realloc = 1; } #ifdef USE_NETCDF4 NC_CHECK(nc_inq_format(ogrp, &okind)); if(okind == NC_FORMAT_NETCDF4 || okind == NC_FORMAT_NETCDF4_CLASSIC) { /* if this variable chunked, set variable chunk cache size */ int contig = 1; NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL)); if(contig == 0) { /* chunked */ if(option_compute_chunkcaches) { /* Try to estimate variable-specific chunk cache, * depending on specific size and shape of this * variable's chunks. This doesn't work yet. */ size_t chunkcache_size, chunkcache_nelems; float chunkcache_preemption; NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid, &chunkcache_size, &chunkcache_nelems, &chunkcache_preemption)); NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, chunkcache_size, chunkcache_nelems, chunkcache_preemption)); } else { /* by default, use same chunk cache for all chunked variables */ NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, option_chunk_cache_size, option_chunk_cache_nelems, COPY_CHUNKCACHE_PREEMPTION)); } } } /* For chunked variables, option_copy_buffer_size must also be at least as large as * size of a chunk in input, otherwise resize it. */ { NC_CHECK(inq_var_chunksize(igrp, varid, &chunksize)); if(chunksize > option_copy_buffer_size) { option_copy_buffer_size = chunksize; do_realloc = 1; } } #endif /* USE_NETCDF4 */ if(buf && do_realloc) { free(buf); buf = 0; } if(buf == 0) { /* first time or needs to grow */ buf = emalloc(option_copy_buffer_size); memset((void*)buf,0,option_copy_buffer_size); } /* initialize variable iteration */ NC_CHECK(nc_get_iter(igrp, varid, option_copy_buffer_size, &iterp)); start = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t)); count = (size_t *) emalloc((iterp->rank + 1) * sizeof(size_t)); /* nc_next_iter() initializes start and count on first call, * changes start and count to iterate through whole variable on * subsequent calls. */ while((ntoget = nc_next_iter(iterp, start, count)) > 0) { NC_CHECK(nc_get_vara(igrp, varid, start, count, buf)); NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf)); #ifdef USE_NETCDF4 /* we have to explicitly free values for strings and vlens */ if(vartype == NC_STRING) { NC_CHECK(nc_free_string(ntoget, (char **)buf)); } else if(vartype > NC_STRING) { /* user-defined type */ nc_type vclass; NC_CHECK(nc_inq_user_type(igrp, vartype, NULL, NULL, NULL, NULL, &vclass)); if(vclass == NC_VLEN) { NC_CHECK(nc_free_vlens(ntoget, (nc_vlen_t *)buf)); } } #endif /* USE_NETCDF4 */ } /* end main iteration loop */ #ifdef USE_NETCDF4 /* We're all done with this input and output variable, so if * either variable is chunked, free up its variable chunk cache */ /* NC_CHECK(free_var_chunk_cache(igrp, varid)); */ /* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */ #endif /* USE_NETCDF4 */ free(start); free(count); NC_CHECK(nc_free_iter(iterp)); return stat; } /* Copy data from variables in group igrp to variables in * corresponding group with parent ogrp, and all subgroups * recursively */ static int copy_data(int igrp, int ogrp) { int stat = NC_NOERR; int ogid; int nvars; int varid; #ifdef USE_NETCDF4 int numgrps; int *grpids; int i; #endif int iv; /* variable number */ idnode_t* vlist = NULL; /* list for vars specified with -v option */ /* * If any vars were specified with -v option, get list of * associated variable ids relative to this group. Assume vars * specified with syntax like "grp1/grp2/varname" or * "/grp1/grp2/varname" if they are in groups. */ vlist = newidlist(); /* list for vars specified with -v option */ for (iv=0; iv < option_nlvars; iv++) { if(nc_inq_gvarid(igrp, option_lvars[iv], &varid) == NC_NOERR) idadd(vlist, varid); } /* get groupid in output corresponding to group igrp in input, * given parent group (or root group) ogrp in output */ NC_CHECK(get_grpid(igrp, ogrp, &ogid)); /* Copy data from this group */ NC_CHECK(nc_inq_nvars(igrp, &nvars)); for (varid = 0; varid < nvars; varid++) { if (option_nlvars > 0 && ! idmember(vlist, varid)) continue; if (!group_wanted(igrp, option_nlgrps, option_grpids)) continue; NC_CHECK(copy_var_data(igrp, varid, ogid)); } #ifdef USE_NETCDF4 /* Copy data from subgroups */ stat = nc_inq_grps(igrp, &numgrps, NULL); grpids = (int *)emalloc((numgrps + 1) * sizeof(int)); NC_CHECK(nc_inq_grps(igrp, &numgrps, grpids)); for(i = 0; i < numgrps; i++) { if (!option_grpstruct && !group_wanted(grpids[i], option_nlgrps, option_grpids)) continue; NC_CHECK(copy_data(grpids[i], ogid)); } free(grpids); #endif /* USE_NETCDF4 */ freeidlist(vlist); return stat; } /* Count total number of dimensions in ncid and all its descendant subgroups */ int count_dims(int ncid) { #ifdef USE_NETCDF4 int numgrps; #endif int ndims; NC_CHECK(nc_inq_ndims(ncid, &ndims)); #ifdef USE_NETCDF4 NC_CHECK(nc_inq_grps(ncid, &numgrps, NULL)); if(numgrps > 0) { int igrp; int *grpids = emalloc(numgrps * sizeof(int)); NC_CHECK(nc_inq_grps(ncid, &numgrps, grpids)); for(igrp = 0; igrp < numgrps; igrp++) { ndims += count_dims(grpids[igrp]); } free(grpids); } #endif /* USE_NETCDF4 */ return ndims; } /* Test if special case: netCDF-3 file with more than one record * variable. Performance can be very slow for this case when the disk * block size is large, there are many record variables, and a * record's worth of data for some variables is smaller than the disk * block size. In this case, copying the record variables a variable * at a time causes much rereading of record data, so instead we want * to copy data a record at a time. */ static int nc3_special_case(int ncid, int kind) { if (kind == NC_FORMAT_CLASSIC || kind == NC_FORMAT_64BIT_OFFSET || kind == NC_FORMAT_CDF5) { int recdimid = 0; NC_CHECK(nc_inq_unlimdim(ncid, &recdimid)); if (recdimid != -1) { /* we have a record dimension */ int nvars; int varid; NC_CHECK(nc_inq_nvars(ncid, &nvars)); for (varid = 0; varid < nvars; varid++) { int *dimids = 0; int ndims; NC_CHECK( nc_inq_varndims(ncid, varid, &ndims) ); if (ndims > 0) { int dimids0; dimids = (int *) emalloc((ndims + 1) * sizeof(int)); NC_CHECK( nc_inq_vardimid(ncid, varid, dimids) ); dimids0 = dimids[0]; free(dimids); if(dimids0 == recdimid) { return 1; /* found a record variable */ } } } } } return 0; } /* Classify variables in ncid as either fixed-size variables (with no * unlimited dimension) or as record variables (with an unlimited * dimension) */ static int classify_vars( int ncid, /* netCDF ID */ size_t *nf, /* for returning number of fixed-size variables */ int **fvars, /* the array of fixed_size variable IDS, caller should free */ size_t *nr, /* for returning number of record variables */ int **rvars) /* the array of record variable IDs, caller should free */ { int varid; int varindex = 0; int nvars; NC_CHECK(nc_inq_nvars(ncid, &nvars)); *nf = 0; *fvars = (int *) emalloc(nvars * sizeof(int)); *nr = 0; *rvars = (int *) emalloc(nvars * sizeof(int)); if(option_nlvars > 0) { for (varindex = 0; varindex < option_nlvars; varindex++) { nc_inq_varid(ncid,option_lvars[varindex],&varid); if (isrecvar(ncid, varid)) { (*rvars)[*nr] = varid; (*nr)++; } else { (*fvars)[*nf] = varid; (*nf)++; } } } else { for (varid = 0; varid < nvars; varid++) { if (isrecvar(ncid, varid)) { (*rvars)[*nr] = varid; (*nr)++; } else { (*fvars)[*nf] = varid; (*nf)++; } } } return NC_NOERR; } /* Only called for classic format or 64-bit offset format files, to speed up special case */ static int copy_fixed_size_data(int igrp, int ogrp, size_t nfixed_vars, int *fixed_varids) { size_t ivar; /* for each fixed-size variable, copy data */ for (ivar = 0; ivar < nfixed_vars; ivar++) { int varid = fixed_varids[ivar]; NC_CHECK(copy_var_data(igrp, varid, ogrp)); } if (fixed_varids) free(fixed_varids); return NC_NOERR; } /* copy a record's worth of data for a variable from input to output */ static int copy_rec_var_data(int ncid, /* input */ int ogrp, /* output */ int irec, /* record number */ int varid, /* input variable id */ int ovarid, /* output variable id */ size_t *start, /* start indices for record data */ size_t *count, /* edge lengths for record data */ void *buf /* buffer large enough to hold data */ ) { NC_CHECK(nc_get_vara(ncid, varid, start, count, buf)); NC_CHECK(nc_put_vara(ogrp, ovarid, start, count, buf)); return NC_NOERR; } /* Only called for classic format or 64-bit offset format files, to speed up special case */ static int copy_record_data(int ncid, int ogrp, size_t nrec_vars, int *rec_varids) { int unlimid; size_t nrecs = 0; /* how many records? */ size_t irec; size_t ivar; void **buf; /* space for reading in data for each variable */ int *rec_ovarids; /* corresponding varids in output */ size_t **start; size_t **count; NC_CHECK(nc_inq_unlimdim(ncid, &unlimid)); NC_CHECK(nc_inq_dimlen(ncid, unlimid, &nrecs)); buf = (void **) emalloc(nrec_vars * sizeof(void *)); rec_ovarids = (int *) emalloc(nrec_vars * sizeof(int)); start = (size_t **) emalloc(nrec_vars * sizeof(size_t*)); count = (size_t **) emalloc(nrec_vars * sizeof(size_t*)); /* get space to hold one record's worth of data for each record variable */ for (ivar = 0; ivar < nrec_vars; ivar++) { int varid; int ndims; int *dimids; size_t value_size; int dimid; int ii; size_t nvals; char varname[NC_MAX_NAME]; varid = rec_varids[ivar]; NC_CHECK(nc_inq_varndims(ncid, varid, &ndims)); dimids = (int *) emalloc((1 + ndims) * sizeof(int)); start[ivar] = (size_t *) emalloc(ndims * sizeof(size_t)); count[ivar] = (size_t *) emalloc(ndims * sizeof(size_t)); NC_CHECK(nc_inq_vardimid (ncid, varid, dimids)); value_size = val_size(ncid, varid); nvals = 1; for(ii = 1; ii < ndims; ii++) { /* for rec size, don't include first record dimension */ size_t dimlen; dimid = dimids[ii]; NC_CHECK(nc_inq_dimlen(ncid, dimid, &dimlen)); nvals *= dimlen; start[ivar][ii] = 0; count[ivar][ii] = dimlen; } start[ivar][0] = 0; count[ivar][0] = 1; /* 1 record */ buf[ivar] = (void *) emalloc(nvals * value_size); NC_CHECK(nc_inq_varname(ncid, varid, varname)); NC_CHECK(nc_inq_varid(ogrp, varname, &rec_ovarids[ivar])); if(dimids) free(dimids); } /* for each record, copy all variable data */ for(irec = 0; irec < nrecs; irec++) { for (ivar = 0; ivar < nrec_vars; ivar++) { int varid, ovarid; varid = rec_varids[ivar]; ovarid = rec_ovarids[ivar]; start[ivar][0] = irec; NC_CHECK(copy_rec_var_data(ncid, ogrp, irec, varid, ovarid, start[ivar], count[ivar], buf[ivar])); } } for (ivar = 0; ivar < nrec_vars; ivar++) { if(start[ivar]) free(start[ivar]); if(count[ivar]) free(count[ivar]); } if(start) free(start); if(count) free(count); for (ivar = 0; ivar < nrec_vars; ivar++) { if(buf[ivar]) { free(buf[ivar]); } } if (rec_varids) free(rec_varids); if(buf) free(buf); if(rec_ovarids) free(rec_ovarids); return NC_NOERR; } /* copy infile to outfile using netCDF API */ static int copy(char* infile, char* outfile) { int stat = NC_NOERR; int igrp, ogrp; int inkind, outkind; int open_mode = NC_NOWRITE; int create_mode = NC_CLOBBER; size_t ndims; if(option_read_diskless) { open_mode |= NC_DISKLESS; } NC_CHECK(nc_open(infile, open_mode, &igrp)); NC_CHECK(nc_inq_format(igrp, &inkind)); /* option_kind specifies which netCDF format for output, one of * * SAME_AS_INPUT, NC_FORMAT_CLASSIC, NC_FORMAT_64BIT, * NC_FORMAT_NETCDF4, NC_FORMAT_NETCDF4_CLASSIC * * However, if compression or shuffling was specified and kind was SAME_AS_INPUT, * option_kind is changed to NC_FORMAT_NETCDF4_CLASSIC, if input format is * NC_FORMAT_CLASSIC or NC_FORMAT_64BIT . */ outkind = option_kind; if (option_kind == SAME_AS_INPUT) { /* default, kind not specified */ outkind = inkind; /* Deduce output kind if netCDF-4 features requested */ if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT_OFFSET || inkind == NC_FORMAT_CDF5) { if (option_deflate_level > 0 || option_shuffle_vars == NC_SHUFFLE || option_chunkspec) { outkind = NC_FORMAT_NETCDF4_CLASSIC; } } } #ifdef USE_NETCDF4 if(option_chunkspec) { /* Now that input is open, can parse option_chunkspec into binary * structure. */ NC_CHECK(chunkspec_parse(igrp, option_chunkspec)); } #endif /* USE_NETCDF4 */ /* Check if any vars in -v don't exist */ if(missing_vars(igrp, option_nlvars, option_lvars)) exit(EXIT_FAILURE); if(option_nlgrps > 0) { if(inkind != NC_FORMAT_NETCDF4) { error("Group list (-g ...) only permitted for netCDF-4 file"); exit(EXIT_FAILURE); } /* Check if any grps in -g don't exist */ if(grp_matches(igrp, option_nlgrps, option_lgrps, option_grpids) == 0) exit(EXIT_FAILURE); } if(option_write_diskless) create_mode |= NC_WRITE | NC_DISKLESS; /* NC_WRITE persists diskless file on close */ switch(outkind) { case NC_FORMAT_CLASSIC: /* nothing to do */ break; case NC_FORMAT_64BIT_OFFSET: create_mode |= NC_64BIT_OFFSET; break; case NC_FORMAT_CDF5: #ifdef USE_CDF5 create_mode |= NC_64BIT_DATA; break; #else error("netCDF library built without CDF5 support, can't create CDF5 files"); break; #endif #ifdef USE_NETCDF4 case NC_FORMAT_NETCDF4: create_mode |= NC_NETCDF4; break; case NC_FORMAT_NETCDF4_CLASSIC: create_mode |= NC_NETCDF4 | NC_CLASSIC_MODEL; break; #else case NC_FORMAT_NETCDF4: case NC_FORMAT_NETCDF4_CLASSIC: error("netCDF library built with --disable-netcdf4, can't create netCDF-4 files"); break; #endif /* USE_NETCDF4 */ default: error("bad value for option specifying desired output format, see usage\n"); break; } NC_CHECK(nc_create(outfile, create_mode, &ogrp)); NC_CHECK(nc_set_fill(ogrp, NC_NOFILL, NULL)); #ifdef USE_NETCDF4 /* Because types in one group may depend on types in a different * group, need to create all groups before defining types */ if(inkind == NC_FORMAT_NETCDF4) { NC_CHECK(copy_groups(igrp, ogrp)); NC_CHECK(copy_types(igrp, ogrp)); } #endif /* USE_NETCDF4 */ ndims = count_dims(igrp); NC_CHECK(dimmap_init(ndims)); NC_CHECK(copy_schema(igrp, ogrp)); NC_CHECK(nc_enddef(ogrp)); /* For performance, special case netCDF-3 input or output file with record * variables, to copy a record-at-a-time instead of a * variable-at-a-time. */ /* TODO: check that these special cases work with -v option */ if(nc3_special_case(igrp, inkind)) { size_t nfixed_vars, nrec_vars; int *fixed_varids; int *rec_varids; NC_CHECK(classify_vars(igrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids)); NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids)); NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids)); } else if (nc3_special_case(ogrp, outkind)) { size_t nfixed_vars, nrec_vars; int *fixed_varids; int *rec_varids; /* classifies output vars, but returns input varids */ NC_CHECK(classify_vars(ogrp, &nfixed_vars, &fixed_varids, &nrec_vars, &rec_varids)); NC_CHECK(copy_fixed_size_data(igrp, ogrp, nfixed_vars, fixed_varids)); NC_CHECK(copy_record_data(igrp, ogrp, nrec_vars, rec_varids)); } else { NC_CHECK(copy_data(igrp, ogrp)); /* recursive, to handle nested groups */ } NC_CHECK(nc_close(igrp)); NC_CHECK(nc_close(ogrp)); return stat; } /* * For non-negative numeric string with multiplier suffix K, M, G, T, * or P (or lower-case equivalent), return corresponding value * incorporating multiplier 1000, 1000000, 1.0d9, ... 1.0d15, or -1.0 * for error. */ static double double_with_suffix(char *str) { double dval; char *suffix = 0; errno = 0; dval = strtod(str, &suffix); if(dval < 0 || errno != 0) return -1.0; if(*suffix) { switch (*suffix) { case 'k': case 'K': dval *= 1000; break; case 'm': case 'M': dval *= 1000000; break; case 'g': case 'G': dval *= 1000000000; break; case 't': case 'T': dval *= 1.0e12; break; case 'p': case 'P': dval *= 1.0e15; break; default: dval = -1.0; /* error, suffix multiplier must be K, M, G, or T */ } } return dval; } static void usage(void) { #define USAGE "\ [-k kind] specify kind of netCDF format for output file, default same as input\n\ kind strings: 'classic', '64-bit offset', 'cdf5',\n\ 'netCDF-4', 'netCDF-4 classic model'\n\ [-3] netCDF classic output (same as -k 'classic')\n\ [-6] 64-bit-offset output (same as -k '64-bit offset')\n\ [-4] netCDF-4 output (same as -k 'netCDF-4')\n\ [-7] netCDF-4-classic output (same as -k 'netCDF-4 classic model')\n\ [-5] CDF5 output (same as -k 'cdf5)\n\ [-d n] set output deflation compression level, default same as input (0=none 9=max)\n\ [-s] add shuffle option to deflation compression\n\ [-c chunkspec] specify chunking for dimensions, e.g. \"dim1/N1,dim2/N2,...\"\n\ [-u] convert unlimited dimensions to fixed-size dimensions in output copy\n\ [-w] write whole output file from diskless netCDF on close\n\ [-v var1,...] include data for only listed variables, but definitions for all variables\n\ [-V var1,...] include definitions and data for only listed variables\n\ [-g grp1,...] include data for only variables in listed groups, but all definitions\n\ [-G grp1,...] include definitions and data only for variables in listed groups\n\ [-m n] set size in bytes of copy buffer, default is 5000000 bytes\n\ [-h n] set size in bytes of chunk_cache for chunked variables\n\ [-e n] set number of elements that chunk_cache can hold\n\ [-r] read whole input file into diskless file on open (classic or 64-bit offset or cdf5 formats only)\n\ [-F filterspec] specify the compression algorithm to apply to an output variable.\n\ [-Ln] set log level to n (>= 0); ignored if logging isn't enabled.\n\ infile name of netCDF input file\n\ outfile name for netCDF output file\n" /* Don't document this flaky option until it works better */ /* [-x] use experimental computed estimates for variable-specific chunk caches\n\ */ error("%s [-k kind] [-[3|4|6|7]] [-d n] [-s] [-c chunkspec] [-u] [-w] [-[v|V] varlist] [-[g|G] grplist] [-m n] [-h n] [-e n] [-r] infile outfile\n%s\nnetCDF library version %s", progname, USAGE, nc_inq_libvers()); } int main(int argc, char**argv) { int exitcode = EXIT_SUCCESS; char* inputfile = NULL; char* outputfile = NULL; int c; #ifdef USE_NETCDF4 int i; struct FilterSpec filterspec; #endif /* table of formats for legal -k values */ struct Kvalues { char* name; int kind; } legalkinds[] = { /* NetCDF-3 classic format (32-bit offsets) */ {"classic", NC_FORMAT_CLASSIC}, /* canonical format name */ {"nc3", NC_FORMAT_CLASSIC}, /* short format name */ {"1", NC_FORMAT_CLASSIC}, /* deprecated, use "-3" or "-k nc3" instead */ /* NetCDF-3 64-bit offset format */ {"64-bit offset", NC_FORMAT_64BIT_OFFSET}, /* canonical format name */ {"nc6", NC_FORMAT_64BIT_OFFSET}, /* short format name */ {"2", NC_FORMAT_64BIT_OFFSET}, /* deprecated, use "-6" or "-k nc6" instead */ {"64-bit-offset", NC_FORMAT_64BIT_OFFSET}, /* deprecated alias */ /* NetCDF-4 HDF5-based format */ {"netCDF-4", NC_FORMAT_NETCDF4}, /* canonical format name */ {"nc4", NC_FORMAT_NETCDF4}, /* short format name */ {"3", NC_FORMAT_NETCDF4}, /* deprecated, use "-4" or "-k nc4" instead */ {"netCDF4", NC_FORMAT_NETCDF4}, /* deprecated aliases */ {"hdf5", NC_FORMAT_NETCDF4}, {"enhanced", NC_FORMAT_NETCDF4}, /* NetCDF-4 HDF5-based format, restricted to classic data model */ {"netCDF-4 classic model", NC_FORMAT_NETCDF4_CLASSIC}, /* canonical format name */ {"nc7", NC_FORMAT_NETCDF4_CLASSIC}, /* short format name */ {"4", NC_FORMAT_NETCDF4_CLASSIC}, /* deprecated, use "-7" or -k nc7" */ {"netCDF-4-classic", NC_FORMAT_NETCDF4_CLASSIC}, /* deprecated aliases */ {"netCDF-4_classic", NC_FORMAT_NETCDF4_CLASSIC}, {"netCDF4_classic", NC_FORMAT_NETCDF4_CLASSIC}, {"hdf5-nc3", NC_FORMAT_NETCDF4_CLASSIC}, {"enhanced-nc3", NC_FORMAT_NETCDF4_CLASSIC}, /* The 64-bit data (CDF5) kind (5) */ {"5", NC_FORMAT_CDF5}, {"64-bit-data", NC_FORMAT_CDF5}, {"64-bit data", NC_FORMAT_CDF5}, {"nc5", NC_FORMAT_CDF5}, {"cdf5", NC_FORMAT_CDF5}, /* null terminate*/ {NULL,0} }; opterr = 1; progname = argv[0]; if (argc <= 1) { usage(); } while ((c = getopt(argc, argv, "k:3467d:sum:c:h:e:rwxg:G:v:V:F:L:")) != -1) { switch(c) { case 'k': /* for specifying variant of netCDF format to be generated Format names: "classic" or "nc3" "64-bit offset" or "nc6" "netCDF-4" or "nc4" "netCDF-4 classic model" or "nc7" "64-bit-data" | "64-bit data" | "cdf5" | "nc5" Format version numbers (deprecated): 1 (=> classic) 2 (=> 64-bit offset) 3 (=> netCDF-4) 4 (=> netCDF-4 classic model) 5 (=> classic 64 bit data, CDF-5) */ { struct Kvalues* kvalue; char *kind_name = (char *) emalloc(strlen(optarg)+1); (void)strcpy(kind_name, optarg); for(kvalue=legalkinds;kvalue->name;kvalue++) { if(strcmp(kind_name,kvalue->name) == 0) { option_kind = kvalue->kind; break; } } if(kvalue->name == NULL) { error("invalid output format: %s", kind_name); } } break; case '3': /* output format is classic (netCDF-3) */ option_kind = NC_FORMAT_CLASSIC; break; case '5': /* output format is cdf5 */ option_kind = NC_FORMAT_CDF5; break; case '6': /* output format is 64-bit-offset (netCDF-3 version 2) */ option_kind = NC_FORMAT_64BIT_OFFSET; break; case '4': /* output format is netCDF-4 (variant of HDF5) */ option_kind = NC_FORMAT_NETCDF4; break; case '7': /* output format is netCDF-4 (restricted to classic model)*/ option_kind = NC_FORMAT_NETCDF4_CLASSIC; break; case 'd': /* non-default compression level specified */ option_deflate_level = strtol(optarg, NULL, 10); if(option_deflate_level < 0 || option_deflate_level > 9) { error("invalid deflation level: %d", option_deflate_level); } break; case 's': /* shuffling, may improve compression */ option_shuffle_vars = NC_SHUFFLE; break; case 'u': /* convert unlimited dimensions to fixed size */ option_fix_unlimdims = 1; break; case 'm': /* non-default size of data copy buffer */ { double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */ if(dval < 0) error("Suffix used for '-m' option value must be K, M, G, T, or P"); option_copy_buffer_size = dval; break; } case 'h': /* non-default size of chunk cache */ { double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */ if(dval < 0) error("Suffix used for '-h' option value must be K, M, G, T, or P"); option_chunk_cache_size = dval; break; } case 'e': /* number of elements chunk cache can hold */ { double dval = double_with_suffix(optarg); /* "K" for kilobytes. "M" for megabytes, ... */ if(dval < 0 ) error("Suffix used for '-e' option value must be K, M, G, T, or P"); option_chunk_cache_nelems = (long)dval; break; } case 'r': option_read_diskless = 1; /* read into memory on open */ break; case 'w': option_write_diskless = 1; /* write to memory, persist on close */ break; case 'x': /* use experimental variable-specific chunk caches */ option_compute_chunkcaches = 1; break; case 'c': /* optional chunking spec for each dimension in list */ /* save chunkspec string for parsing later, once we know input ncid */ option_chunkspec = strdup(optarg); break; case 'g': /* group names */ /* make list of names of groups specified */ make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids); option_grpstruct = true; break; case 'G': /* group names */ /* make list of names of groups specified */ make_lgrps (optarg, &option_nlgrps, &option_lgrps, &option_grpids); option_grpstruct = false; break; case 'v': /* variable names */ /* make list of names of variables specified */ make_lvars (optarg, &option_nlvars, &option_lvars); option_varstruct = true; break; case 'V': /* variable names */ /* make list of names of variables specified */ make_lvars (optarg, &option_nlvars, &option_lvars); option_varstruct = false; break; case 'L': /* Set logging, if logging support was compiled in. */ #ifdef LOGGING { int level = atoi(optarg); if(level >= 0) nc_set_log_level(level); } #endif break; case 'F': /* optional filter spec for a specified variable */ #ifdef USE_NETCDF4 /* If the arg is "none" then suppress all filters on output unless explicit */ if(strcmp(optarg,"none")==0) { suppressfilters = 1; } else { if(parsefilterspec(optarg,&filterspec) != NC_NOERR) usage(); if(nfilterspecs >= (MAX_FILTER_SPECS-1)) error("too many -F filterspecs\n"); filterspecs[nfilterspecs] = filterspec; nfilterspecs++; // Force output to be netcdf-4 option_kind = NC_FORMAT_NETCDF4; } #else error("-F requires netcdf-4"); #endif break; default: usage(); } } argc -= optind; argv += optind; if (argc != 2) { error("one input file and one output file required"); } inputfile = argv[0]; outputfile = argv[1]; if(strcmp(inputfile, outputfile) == 0) { error("output would overwrite input"); } #ifdef USE_NETCDF4 #ifdef DEBUGFILTER { int j; for(i=0;iparams[j]); } fprintf(stderr,"}\n"); fflush(stderr); } } #endif /*DEBUGFILTER*/ #endif /*USE_NETCDF4*/ if(copy(inputfile, outputfile) != NC_NOERR) exitcode = EXIT_FAILURE; #ifdef USE_NETCDF4 /* Clean up */ for(i=0;ifqn) free(spec->fqn); if(spec->params) free(spec->params); } #endif /*USE_NETCDF4*/ exit(exitcode); }