Extend nccopy -F option syntax.

A user suggested that the nccopy -F option
syntax should be extended to support specification
of multiple (or all) variables in a single -F option.

The new syntax allows:

1. '*' as the name of the variable; this means apply the
   filter to all variables in the data set.
2. *var1|var2|...* as the variable name to indicate that the filter
   should be applied to the multiple specified variables.
This commit is contained in:
Dennis Heimbigner 2019-02-08 18:48:17 -07:00
parent a260bbbf76
commit a6b04c0c66
12 changed files with 275 additions and 90 deletions

View File

@ -3,7 +3,7 @@
DB=1
#X=-x
ANSI=1
#ANSI=1
#MEM=1
#NOTUIL=1
#FAST=1

View File

@ -142,6 +142,17 @@ The "-F" option can be used repeatedly as long as the variable name
part is different. A different filter id and parameters can be
specified for each occurrence.
It can be convenient to specify that the same compression is to be
applied to more than one variable. To support this, two additional
*-F* cases are defined.
1. ````-F *,...``` means apply the filter to all variables in the dataset.
2. ````-F v1|v2|..,...``` means apply the filter to a multiple variables.
Note that the characters '*' and '|' are bash reserved characters,
so you will probably need to escape or quote the filter spec in
that environment.
As a rule, any input filter on an input variable will be applied
to the equivalent output variable -- assuming the output file type
is netcdf-4. It is, however, sometimes convenient to suppress
@ -149,16 +160,19 @@ output compression either totally or on a per-variable basis.
Total suppression of output filters can be accomplished by specifying
a special case of "-F", namely this.
````
nccopy -F "none" input.nc output.nc
nccopy -F none input.nc output.nc
````
Suppression of output filtering for a specific variable can be accomplished
using this format.
The expression ````-F *,none```` is equivalent to ````-F none````.
Suppression of output filtering for a specific set of variables
can be accomplished using these formats.
````
nccopy -F "var,none" input.nc output.nc
nccopy -F "v1|v2|...,none" input.nc output.nc
````
where "var" is the fully qualified name of the variable.
where "var" and the "vi" are the fully qualified name of a variable.
The rules for all possible cases of the "-F" flag are defined
The rules for all possible cases of the "-F none" flag are defined
by this table.
<table>
@ -169,6 +183,7 @@ by this table.
<tr><td>false<td>unspecified<td>defined<td>use input filter
<tr><td>false<td>-Fvar,none<td>NA<td>unfiltered
<tr><td>false<td>-Fvar,...<td>NA<td>use output filter
<tr><td>false<td>unspecified<td>none<td>unfiltered
</table>
Parameter Encoding {#ParamEncode}

View File

@ -136,7 +136,7 @@ tst_put_vars_two_unlim_dim.c tst_empty_vlen_unlim.c \
run_empty_vlen_test.sh ref_hdf5_compat1.nc ref_hdf5_compat2.nc \
ref_hdf5_compat3.nc tst_misc.sh tdset.h5 tst_szip.sh ref_szip.h5 \
ref_szip.cdl tst_filter.sh bzip2.cdl filtered.cdl unfiltered.cdl \
ref_bzip2.c findplugin.in perftest.sh
ref_bzip2.c findplugin.in perftest.sh unfilteredvv.cdl filteredvv.cdl
CLEANFILES = tst_mpi_parallel.bin cdm_sea_soundings.nc bm_chunking.nc \
tst_floats_1D.cdl floats_1D_3.nc floats_1D.cdl tst_*.nc \

41
nc_test4/filteredvv.cdl Normal file
View File

@ -0,0 +1,41 @@
netcdf filteredvv {
dimensions:
dim0 = 4 ;
dim1 = 4 ;
variables:
float var1(dim0, dim1) ;
var1:_Storage = "chunked" ;
var1:_ChunkSizes = 2, 2 ;
var1:_Endianness = "little" ;
var1:_Filter = "307,9,4" ;
var1:_NoFill = "true" ;
// global attributes:
:_Format = "netCDF-4" ;
data:
var1 =
100, 101, 102, 103,
104, 105, 106, 107,
108, 109, 1010, 1011,
1012, 1013, 1014, 1015 ;
group: g {
variables:
float var2(dim0, dim1) ;
var2:_Storage = "chunked" ;
var2:_ChunkSizes = 2, 2 ;
var2:_Endianness = "little" ;
var2:_Filter = "307,9,4" ;
var2:_NoFill = "true" ;
// group attributes:
data:
var2 =
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15 ;
} // group g
}

View File

@ -160,7 +160,7 @@ verifyparams(void)
static int
openfile(void)
{
unsigned int* params;
unsigned int* params = NULL;
/* Open the file and check it. */
CHECK(nc_open(TESTFILE, NC_NOWRITE, &ncid));
@ -191,6 +191,8 @@ openfile(void)
}
if(nerrs > 0) return NC_EFILTER;
if(params) free(params);
/* Verify chunking */
if(!verifychunks())
return 0;

View File

@ -30,7 +30,12 @@ cat $1 \
# Function to extract _Filter attribute from a file
# These attributes might be platform dependent
getfilterattr() {
sed -e '/var:_Filter/p' -ed <$1 >$2
case "$1" in
var1) sed -e '/var1:_Filter/p' -ed <$1 >$2 ;;
var2) sed -e '/var2:_Filter/p' -ed <$1 >$2 ;;
var) sed -e '/var:_Filter/p' -ed <$1 >$2 ;;
*) sed -e '/var:_Filter/p' -ed <$1 >$2 ;;
esac
}
trimleft() {
@ -98,7 +103,10 @@ fi
if test "x$NCP" = x1 ; then
echo "*** Testing dynamic filters using nccopy"
rm -f ./unfiltered.nc ./filtered.nc ./tmp.nc ./filtered.dump ./tst_filter.txt
# Create our input test files
${NCGEN} -4 -lb -o unfiltered.nc ${srcdir}/unfiltered.cdl
${NCGEN} -4 -lb -o unfilteredvv.nc ${srcdir}/unfilteredvv.cdl
echo " *** Testing simple filter application"
${NCCOPY} -M0 -F "/g/var,307,9,4" unfiltered.nc filtered.nc
${NCDUMP} -s filtered.nc > ./tst_filter.txt
@ -107,6 +115,22 @@ sclean ./tst_filter.txt ./filtered.dump
diff -b -w ${srcdir}/filtered.cdl ./filtered.dump
echo " *** Pass: nccopy simple filter"
echo " *** Testing '*' filter application"
${NCCOPY} -M0 -F "*,307,9,4" unfilteredvv.nc filteredvv.nc
${NCDUMP} -s filteredvv.nc > ./tst_filtervv.txt
# Remove irrelevant -s output
sclean ./tst_filtervv.txt ./filteredvv.dump
diff -b -w ${srcdir}/filteredvv.cdl ./filteredvv.dump
echo " *** Pass: nccopy '*' filter"
echo " *** Testing 'v|v' filter application"
${NCCOPY} -M0 -F "var1|/g/var2,307,9,4" unfilteredvv.nc filteredvbar.nc
${NCDUMP} -n filteredvv -s filteredvbar.nc > ./tst_filtervbar.txt
# Remove irrelevant -s output
sclean ./tst_filtervbar.txt ./filteredvbar.dump
diff -b -w ${srcdir}/filteredvv.cdl ./filteredvbar.dump
echo " *** Pass: nccopy 'v|v' filter"
echo " *** Testing pass-thru of filters"
rm -f ./tst_filter.txt tst_filter2.txt ./tst_filter2.nc
# Prevent failure by allowing any chunk size
@ -170,6 +194,8 @@ rm -f ./bzip*.nc ./unfiltered.nc ./filtered.nc ./tst_filter.txt ./tst_filter2.tx
rm -f ./test_bzip2.c
rm -f ./testmisc.nc
rm -f ./tst_filter2.nc
rm -f ./unfilteredvv.nc ./filteredvv.nc ./filteredvbar.nc
rm -f ./tst_filtervv.txt ./tst_filtervbar.txt
echo "*** Pass: all selected tests passed"
exit 0

29
nc_test4/unfilteredvv.cdl Normal file
View File

@ -0,0 +1,29 @@
netcdf unfilteredvv {
dimensions:
dim0 = 4 ;
dim1 = 4 ;
variables:
float var1(dim0, dim1) ;
var1:_ChunkSizes = 2, 2 ;
data:
var1 =
100, 101, 102, 103,
104, 105, 106, 107,
108, 109, 1010, 1011,
1012, 1013, 1014, 1015 ;
group: g {
variables:
float var2(dim0, dim1) ;
var2:_ChunkSizes = 2, 2 ;
data:
var2 =
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, 14, 15 ;
} // group g
}

View File

@ -36,6 +36,19 @@ List* listnew(void)
return l;
}
int
listfreeall(List* l)
{
if(l) {
int i;
for(i=0;i<listlength(l);i++) {
void* elem = listget(l,i);
if(elem != NULL) free(elem);
}
}
return listfree(l);
}
int
listfree(List* l)
{

View File

@ -21,6 +21,7 @@ typedef struct List {
EXTERNC List* listnew(void);
EXTERNC int listfree(List*);
EXTERNC int listfreeall(List*);
EXTERNC int listsetalloc(List*,unsigned long);
EXTERNC int listsetlength(List*,unsigned long);

View File

@ -273,20 +273,25 @@ Set the log level; only usable if nccopy supports netCDF-4 (enhanced).
Set the minimum chunk size; only usable if nccopy supports netCDF-4 (enhanced).
.IP "\fB \-F \fP \fIfilterspec\fP"
For netCDF-4 output, including netCDF-4 classic model, specify a filter
to apply to an specified variable in the output. As a rule, the filter
to apply to a specified set of variables in the output. As a rule, the filter
is a compression/decompression algorithm with a unique numeric identifier
assigned by the HDF Group (see https://support.hdfgroup.org/services/filters.html).
.IP
The \fIfilterspec\fP argument has this general form.
.RS
fqn,filterid,param1,param2...paramn
fqn1|fqn2...,filterid,param1,param2...paramn
or
*,filterid,param1,param2...paramn
.RE
The fqn (fully qualified name) is the name
An fqn (fully qualified name) is the name
of a variable prefixed by its containing
groups with the group names separated by forward slash ('/').
An example might be \FI/g1/g2/var\fP. Alternatively,
just the variable name can be given if it is in the root group:
e.g. \FIvar\fP. Backslash escapes may be used as needed.
A note of warning: the '|' separator is a bash reserved character, so you will
probably need to put the filter spec in some kind of quotes or otherwise escape it.
.IP
The filterid is an unsigned positive integer representing the id
assigned by the HDFgroup to the filter. Following the id is a sequence of
parameters defining the operation of the filter. Each parameter

View File

@ -70,8 +70,7 @@ struct FilterSpec {
unsigned int* params;
};
static int nfilterspecs = 0; /* Number of defined filter specs */
static struct FilterSpec filterspecs[MAX_FILTER_SPECS];
static List* filterspecs = NULL;
static int suppressfilters = 0; /* 1 => do not apply any output filters unless specified */
#endif
@ -245,7 +244,40 @@ done:
}
static int
parsefilterspec(const char* optarg0, struct FilterSpec* spec)
parsevarlist(char* vars, List* vlist)
{
int stat = NC_NOERR;
char* q = NULL;
int nvars = 0;
/* Special case 1: empty set of vars */
if(vars == NULL || strlen(vars)==0) {stat = NC_EINVAL; goto done;}
/* Special case 2: "*" */
if(strcmp(vars,"*")==0) {
listpush(vlist,strdup("*"));
goto done;
}
/* Walk delimitng on '|' separators */
for(q=vars;*q;q++) {
if(*q == '\\') q++;
else if(*q == '|') {*q = '\0'; nvars++;}
/* else continue */
}
nvars++; /*for last var*/
/* Rewalk to capture the variables */
for(q=vars;nvars > 0; nvars--) {
listpush(vlist,strdup(q));
q += (strlen(q)+1); /* move to next */
}
done:
return stat;
}
static int
parsefilterspec(const char* optarg0, List* speclist)
{
int stat = NC_NOERR;
char* optarg = NULL;
@ -254,50 +286,63 @@ parsefilterspec(const char* optarg0, struct FilterSpec* spec)
unsigned int id;
char* p = NULL;
char* remainder = NULL;
List* vlist = NULL;
int i;
int isnone = 0;
if(optarg0 == NULL || strlen(optarg0) == 0 || spec == NULL) return 0;
memset(spec,0,sizeof(struct FilterSpec));
if(optarg0 == NULL || strlen(optarg0) == 0 || speclist == NULL) return 0;
optarg = strdup(optarg0);
/* Collect the fqn, taking escapes into account */
/* Delimit the initial set of variables, taking escapes into account */
p = optarg;
remainder = NULL;
for(;*p;p++) {
if(*p == '\\') p++;
for(;;p++) {
if(*p == '\0') {remainder = p; break;}
else if(*p == ',') {*p = '\0'; remainder = p+1; break;}
else if(*p == '\0') {remainder = p; break;}
else if(*p == '\\') p++;
/* else continue */
}
if(strlen(optarg) == 0) {stat = NC_EINVAL; goto done;} /* fqn does not exist */
/* Make sure leading '/' is in place */
if(optarg[0]=='/')
spec->fqn = strdup(optarg);
else {
spec->fqn = (char*)malloc(1+strlen(optarg)+1);
strcpy(spec->fqn,"/");
strcat(spec->fqn,optarg);
}
/* Parse the variable list */
if((vlist = listnew()) == NULL) {stat = NC_ENOMEM; goto done;}
if((stat=parsevarlist(optarg,vlist))) goto done;
/* Check for special cases */
if( (remainder == NULL) ||
(strncmp(remainder,"none",4) == 0)) {
spec->nofilter = 1;
goto done;
}
/* Collect the id+parameters */
if((stat = NC_parsefilterspec(remainder,&id,&nparams,&params)) == NC_NOERR) {
if(spec != NULL) {
spec->filterid = id;
if(strcasecmp(remainder,"none") != 0) {
/* Collect the id+parameters */
if((stat=NC_parsefilterspec(remainder,&id,&nparams,&params))) goto done;
} else
isnone = 1;
/* Construct a spec entry for each element in vlist */
for(i=0;i<listlength(vlist);i++) {
size_t vlen;
struct FilterSpec* spec = NULL;
const char* var = listget(vlist,i);
if(var == NULL || strlen(var) == 0) continue;
if((spec = calloc(1,sizeof(struct FilterSpec)))==NULL)
{stat = NC_ENOMEM; goto done;}
vlen = strlen(var);
spec->fqn = malloc(vlen+1+1); /* make room for nul and possible prefix '/' */
if(spec->fqn == NULL) {stat = NC_ENOMEM; goto done;}
spec->fqn[0] = '\0'; /* for strlcat */
if(strcmp(var,"*") != 0 && var[0] != '/') strlcat(spec->fqn,"/",vlen+2);
strlcat(spec->fqn,var,vlen+2);
if(isnone)
spec->nofilter = 1;
else {
spec->filterid = id;
spec->nparams = nparams;
spec->params = params;
/* Duplicate the params */
spec->params = malloc(nparams*sizeof(unsigned int));
if(spec->params == NULL) {stat = NC_ENOMEM; goto done;}
memcpy(spec->params,params,nparams*sizeof(unsigned int));
}
listpush(speclist,spec);
spec = NULL;
}
done:
if(params) free(params);
if(vlist) listfreeall(vlist);
if(optarg) free(optarg);
return stat;
}
@ -672,9 +717,10 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
VarID vid = {igrp,varid};
VarID ovid = {ogrp,o_varid};
/* handle filter parameters, copying from input, overriding with command-line options */
struct FilterSpec inspec = {NULL,0,0,0,NULL},
ospec = {NULL,0,0,0,NULL},
actualspec = {NULL,0,0,0,NULL};
struct FilterSpec* ospec = NULL;
struct FilterSpec inspec;
struct FilterSpec nospec;
struct FilterSpec* actualspec = NULL;
int i;
char* ofqn = NULL;
int inputdefined, outputdefined, unfiltered;
@ -689,20 +735,23 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
/* Clear the in and out specs */
memset(&inspec,0,sizeof(inspec));
memset(&ospec,0,sizeof(ospec));
memset(&actualspec,0,sizeof(actualspec));
memset(&nospec,0,sizeof(nospec));
nospec.nofilter = 1;
actualspec = NULL;
ospec = NULL;
/* Is there a filter on the output variable */
outputdefined = 0; /* default is no filter defined */
/* Only bother to look if output is netcdf-4 variant */
if(outnc4) {
/* See if any output filter spec is defined for this output variable */
for(i=0;i<nfilterspecs;i++) {
if(strcmp(filterspecs[i].fqn,ofqn)==0) {
ospec = filterspecs[i];
outputdefined = 1;
break;
}
for(i=0;i<listlength(filterspecs);i++) {
struct FilterSpec* spec = listget(filterspecs,i);
if(strcmp(spec->fqn,"*")==0 || strcmp(spec->fqn,ofqn)==0) {
ospec = spec;
outputdefined = 1;
break;
}
}
}
@ -726,35 +775,38 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
global output input Actual Output
suppress filter filter filter
-----------------------------------------------
true undefined NA unfiltered
true 'none' NA unfiltered
true defined NA use output filter
false undefined defined use input filter
false 'none' NA unfiltered
false defined NA use output filter
true undefined NA unfiltered
true 'none' NA unfiltered
true defined NA use output filter
false undefined defined use input filter
false 'none' NA unfiltered
false defined NA use output filter
false undefined undefined unfiltered
*/
unfiltered = 0;
if(suppressfilters && !outputdefined) /* row 1 */
unfiltered = 1;
else if(suppressfilters && outputdefined && ospec.nofilter) /* row 2 */
else if(suppressfilters && outputdefined && ospec->nofilter) /* row 2 */
unfiltered = 1;
else if(suppressfilters && outputdefined) /* row 3 */
actualspec = ospec;
else if(!suppressfilters && !outputdefined && inputdefined) /* row 4 */
actualspec = inspec;
else if(!suppressfilters && outputdefined && ospec.nofilter) /* row 5 */
actualspec = &inspec;
else if(!suppressfilters && outputdefined && ospec->nofilter) /* row 5 */
unfiltered = 1;
else if(!suppressfilters && outputdefined) /* row 6 */
actualspec = ospec;
else if(!suppressfilters && !outputdefined && !inputdefined) /* row 7 */
actualspec = &nospec;
/* Apply actual filter spec if any */
if(!unfiltered) {
if((stat=nc_def_var_filter(ovid.grpid,ovid.varid,
actualspec.filterid,
actualspec.nparams,
actualspec.params)))
actualspec->filterid,
actualspec->nparams,
actualspec->params)))
goto done;
}
done:
@ -910,6 +962,7 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
int stat = NC_NOERR;
int innc4 = (inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC);
int outnc4 = (outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC);
int deflated = 0; /* true iff deflation is applied */
if(!outnc4)
return stat; /* Ignore non-netcdf4 files */
@ -949,6 +1002,7 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
then default chunking will be turned on; so do a special check for that. */
if(shuffle_out != 0 || deflate_out != 0)
NC_CHECK(nc_def_var_deflate(ogrp, o_varid, shuffle_out, deflate_out, deflate_level_out));
deflated = deflate_out;
}
}
@ -970,8 +1024,10 @@ copy_var_specials(int igrp, int varid, int ogrp, int o_varid, int inkind, int ou
}
}
/* handle other general filters */
NC_CHECK(copy_var_filter(igrp, varid, ogrp, o_varid, inkind, outkind));
if(!deflated) {
/* handle other general filters */
NC_CHECK(copy_var_filter(igrp, varid, ogrp, o_varid, inkind, outkind));
}
return stat;
}
@ -2001,10 +2057,6 @@ main(int argc, char**argv)
char* inputfile = NULL;
char* outputfile = NULL;
int c;
#ifdef USE_NETCDF4
int i;
struct FilterSpec filterspec;
#endif
chunkspecinit();
option_chunkspecs = listnew();
@ -2145,17 +2197,15 @@ main(int argc, char**argv)
break;
case 'F': /* optional filter spec for a specified variable */
#ifdef USE_NETCDF4
/* If the arg is "none" then suppress all filters
/* If the arg is "none" or "*,none" then suppress all filters
on output unless explicit */
if(strcmp(optarg,"none")==0) {
if(strcmp(optarg,"none")==0
|| strcasecmp(optarg,"*,none")==0) {
suppressfilters = 1;
} else {
if(parsefilterspec(optarg,&filterspec) != NC_NOERR)
usage();
if(nfilterspecs >= (MAX_FILTER_SPECS-1))
error("too many -F filterspecs\n");
filterspecs[nfilterspecs] = filterspec;
nfilterspecs++;
if(filterspecs == NULL)
filterspecs = listnew();
NC_CHECK(parsefilterspec(optarg,filterspecs));
/* Force output to be netcdf-4 */
option_kind = NC_FORMAT_NETCDF4;
}
@ -2195,9 +2245,9 @@ main(int argc, char**argv)
#ifdef USE_NETCDF4
#ifdef DEBUGFILTER
{ int j;
for(i=0;i<nfilterspecs;i++) {
struct FilterSpec *spec = &filterspecs[i];
{ int i,j;
for(i=0;i<listlength(filterspecs);i++) {
struct FilterSpec *spec = listget(filterspecs,i);
fprintf(stderr,"filterspecs[%d]={fqn=|%s| filterid=%u nparams=%ld params=",
i,spec->fqn,spec->filterid,(unsigned long)spec->nparams);
for(j=0;j<spec->nparams;j++) {
@ -2215,11 +2265,13 @@ main(int argc, char**argv)
exitcode = EXIT_FAILURE;
#ifdef USE_NETCDF4
/* Clean up */
for(i=0;i<nfilterspecs;i++) {
struct FilterSpec* spec = &filterspecs[i];
if(spec->fqn) free(spec->fqn);
if(spec->params) free(spec->params);
{ int i;
/* Clean up */
for(i=0;i<listlength(filterspecs);i++) {
struct FilterSpec* spec = listget(filterspecs,i);
if(spec->fqn) free(spec->fqn);
if(spec->params) free(spec->params);
}
}
#endif /*USE_NETCDF4*/

View File

@ -1072,6 +1072,7 @@ pr_att_specials(
}
printf("\" ;\n");
}
if(params) free(params);
}
{
int no_fill = 0;