Merge pull request #1409 from Unidata/nccopydefault.dmh

Nccopy was overriding default chunking when it should not.
This commit is contained in:
Ward Fisher 2019-05-29 15:26:09 -06:00 committed by GitHub
commit d6a3944199
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 118 additions and 38 deletions

View File

@ -618,13 +618,13 @@ nc_inq_ncid().
\param varid Variable ID
\param idp Storage which will get the filter id.
\param idp Storage which will get the filter id; a return value of zero means no filter
\param nparamsp Storage which will get the number of parameters to the
filter
\param params Storage which will get associated parameters. Note
the caller must allocate and free.
\param params Storage which will get associated parameters.
Note: the caller must allocate and free.
\returns ::NC_NOERR No error.
\returns ::NC_ENOTNC4 Not a netCDF-4 file.

View File

@ -1,3 +1,4 @@
/* Copyright 2003-2019, University Corporation for Atmospheric
* Research. See COPYRIGHT file for copying and redistribution
* conditions.*/
@ -14,6 +15,28 @@
#endif
#include <math.h> /* For pow() used below. */
#ifdef LOGGING
static void
reportchunking(const char* title, NC_VAR_INFO_T* var)
{
int i;
char buf[8192];
buf[0] = '\0'; /* for strlcat */
strlcat(buf,title,sizeof(buf));
strlcat(buf,"chunksizes for var ",sizeof(buf));
strlcat(buf,var->hdr.name,sizeof(buf));
strlcat(buf,"sizes=",sizeof(buf));
for(i=0;i<var->ndims;i++) {
char digits[64];
if(i > 0) strlcat(buf,",",sizeof(buf));
snprintf(digits,sizeof(digits),"%ld",(unsigned long)var->chunksizes[i]);
strlcat(buf,digits,sizeof(buf));
}
LOG((1,"%s",buf));
}
#endif
/** @internal Default size for unlimited dim chunksize. */
#define DEFAULT_1D_UNLIM_SIZE (4096)
@ -223,6 +246,9 @@ nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var)
}
}
#ifdef LOGGING
reportchunking("find_default: ",var);
#endif
return NC_NOERR;
}
@ -624,7 +650,7 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate,
return NC_ENOTVAR;
assert(var && var->hdr.id == varid);
/* Can't turn on parallel and deflate/fletcher32/szip/shuffle. */
/* Can't turn on parallel and deflate/fletcher32/szip/shuffle (for now). */
if (h5->parallel == NC_TRUE)
if (deflate || fletcher32 || shuffle)
return NC_EINVAL;
@ -726,6 +752,12 @@ nc_def_var_extra(int ncid, int varid, int *shuffle, int *deflate,
return retval;
}
#ifdef LOGGING
{int dfalt=(chunksizes == NULL);
reportchunking(dfalt?"extra: default: ":"extra: user: ",var);
}
#endif
/* Are we setting a fill modes? */
if (no_fill)
{
@ -1008,6 +1040,10 @@ NC4_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams,
if (var->created)
return NC_ELATEDEF;
/* Can't turn on parallel and filter (for now). */
if (h5->parallel == NC_TRUE)
return NC_EINVAL;
#ifdef HAVE_H5Z_SZIP
if(id == H5Z_FILTER_SZIP) {
if(nparams != 2)
@ -1038,6 +1074,17 @@ NC4_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams,
if(var->params == NULL) return NC_ENOMEM;
memcpy(var->params,parms,sizeof(unsigned int)*var->nparams);
}
/* Filter => chunking */
var->contiguous = NC_FALSE;
/* Determine default chunksizes for this variable unless already specified */
if(var->chunksizes && !var->chunksizes[0]) {
if((retval = nc4_find_default_chunksizes2(grp, var)))
return retval;
/* Adjust the cache. */
if ((retval = nc4_adjust_var_cache(grp, var)))
return retval;
}
return NC_NOERR;
}

View File

@ -54,7 +54,7 @@ run_par_bm_test.log: tst_create_files.log
endif # TEST_PARALLEL4
endif # BUILD_UTILITIES
EXTRA_DIST = run_par_bm_test.sh.in run_knmi_bm.sh CMakeLists.txt \
EXTRA_DIST = run_par_bm_test.sh.in run_knmi_bm.sh \
perftest.sh run_bm_test1.sh run_bm_test2.sh \
CMakeLists.txt

View File

@ -484,7 +484,7 @@ init(int argc, char** argv)
totalproduct = 1;
actualproduct = 1;
chunkproduct = 1;
for(i=0;i<MAXDIMS;i++) {
for(i=0;i<NDIMS;i++) {
if(pattern[i] == 1)
chunksize[i] = 1;
totalproduct *= dimsize[i];

View File

@ -469,7 +469,7 @@ init(int argc, char** argv)
totalproduct = 1;
actualproduct = 1;
chunkproduct = 1;
for(i=0;i<MAXDIMS;i++) {
for(i=0;i<NDIMS;i++) {
totalproduct *= dimsize[i];
if(i < ndims) {
actualproduct *= dimsize[i];

View File

@ -347,6 +347,28 @@ done:
return stat;
}
static struct FilterSpec*
filterspecforvar(const char* ofqn)
{
int i;
struct FilterSpec* star = NULL;
struct FilterSpec* match = NULL;
/* See if any output filter spec is defined for this output variable */
/* Name specific overrides '*' */
for(i=0;i<listlength(filterspecs);i++) {
struct FilterSpec* spec = listget(filterspecs,i);
if(strcmp(spec->fqn,"*")==0)
star = spec; /* save */
if(strcmp(spec->fqn,ofqn)==0) {
match = spec;
break;;
}
}
if(match) return match;
if(star) return star;
return NULL;
}
/* Return size of chunk in bytes for a variable varid in a group igrp, or 0 if
* layout is contiguous */
@ -721,7 +743,6 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
struct FilterSpec inspec;
struct FilterSpec nospec;
struct FilterSpec* actualspec = NULL;
int i;
char* ofqn = NULL;
int inputdefined, outputdefined, unfiltered;
int innc4 = (inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC);
@ -745,14 +766,9 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
/* Only bother to look if output is netcdf-4 variant */
if(outnc4) {
/* See if any output filter spec is defined for this output variable */
for(i=0;i<listlength(filterspecs);i++) {
struct FilterSpec* spec = listget(filterspecs,i);
if(strcmp(spec->fqn,"*")==0 || strcmp(spec->fqn,ofqn)==0) {
ospec = spec;
outputdefined = 1;
break;
}
}
ospec = filterspecforvar(ofqn);
if(ospec != NULL)
outputdefined = 1;
}
/* Is there a filter on the input variable */
@ -762,26 +778,26 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
stat=nc_inq_var_filter(vid.grpid,vid.varid,&inspec.filterid,&inspec.nparams,NULL);
if(stat && stat != NC_EFILTER)
goto done; /* true error */
if(stat == NC_NOERR) {/* input has a filter */
if(stat == NC_NOERR && inspec.filterid > 0) {/* input has a filter */
inspec.params = (unsigned int*)malloc(sizeof(unsigned int)*inspec.nparams);
if((stat=nc_inq_var_filter(vid.grpid,vid.varid,&inspec.filterid,&inspec.nparams,inspec.params)))
goto done;
goto done;
inputdefined = 1;
}
}
/* Rules for choosing output filter are as follows:
global output input Actual Output
suppress filter filter filter
-----------------------------------------------
true undefined NA unfiltered
true 'none' NA unfiltered
true defined NA use output filter
false undefined defined use input filter
false 'none' NA unfiltered
false defined NA use output filter
false undefined undefined unfiltered
global output input Actual Output
suppress filter filter filter
-----------------------------------------------------------
1 true undefined NA unfiltered
2 true 'none' NA unfiltered
3 true defined NA use output filter
4 false undefined defined use input filter
5 false 'none' NA unfiltered
6 false defined NA use output filter
7 false undefined undefined unfiltered
*/
unfiltered = 0;
@ -799,7 +815,7 @@ copy_var_filter(int igrp, int varid, int ogrp, int o_varid, int inkind, int outk
else if(!suppressfilters && outputdefined) /* row 6 */
actualspec = ospec;
else if(!suppressfilters && !outputdefined && !inputdefined) /* row 7 */
actualspec = &nospec;
unfiltered = 1;
/* Apply actual filter spec if any */
if(!unfiltered) {
@ -820,12 +836,15 @@ done:
/* Propagate chunking from input to output taking -c flags into account. */
/* Subsumes old set_var_chunked */
/* Must make sure we do not override the default chunking when input is classic */
static int
copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkind, int outkind)
{
int stat = NC_NOERR;
int innc4 = (inkind == NC_FORMAT_NETCDF4 || inkind == NC_FORMAT_NETCDF4_CLASSIC);
int outnc4 = (outkind == NC_FORMAT_NETCDF4 || outkind == NC_FORMAT_NETCDF4_CLASSIC);
VarID ovid;
char* ofqn = NULL;
/* First, check the file kinds */
if(!outnc4)
@ -850,7 +869,8 @@ copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkin
goto done;
}
{ /* Try dim-specific chunking */
/* See about dim-specific chunking and if any kind of filters are in place */
{
int idim;
/* size of a chunk: product of dimension chunksizes and size of value */
size_t csprod;
@ -862,9 +882,9 @@ copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkin
int icontig = 1;
int ocontig = 1; /* until proven otherwise */
/* See if chunking was suppressed */
/* See if dim-specific chunking was suppressed */
if(dimchunkspec_omit())
goto done; /* do nothing */
goto next2;
/* Setup for chunking */
typesize = val_size(ogrp, o_varid);
@ -874,7 +894,12 @@ copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkin
memset(&ochunkp,0,sizeof(ochunkp));
/* Get the chunking, if any, on the current input variable */
NC_CHECK(nc_inq_var_chunking(igrp, i_varid, &icontig, ichunkp));
if(innc4) {
NC_CHECK(nc_inq_var_chunking(igrp, i_varid, &icontig, ichunkp));
} else {
icontig = 1;
ichunkp[0] = 0;
}
if(!icontig)
ocontig = 0; /* If input is chunked, then so is output */
@ -915,7 +940,7 @@ copy_chunking(int igrp, int i_varid, int ogrp, int o_varid, int ndims, int inkin
goto next;
}
/* If input is netcdf-4 then use the input size as the chunk size;
/* If input is not netcdf-4 then use the input size as the chunk size;
but do not force chunking.
*/
if(!innc4) {
@ -942,6 +967,14 @@ next:
ocontig = 1; /* Force contiguous */
}
next2:
/* If any kind of output filter was specified, then we have to chunk */
ovid.grpid = ogrp;
ovid.varid = o_varid;
if((stat=computeFQN(ovid,&ofqn))) goto done;
if(option_deflate_level >= 0 || filterspecforvar(ofqn) != NULL)
ocontig = 0;
/* Apply the chunking, if any */
if(ocontig) { /* We can use contiguous output */
@ -949,8 +982,10 @@ next:
} else {
NC_CHECK(nc_def_var_chunking(ogrp, o_varid, NC_CHUNKED, ochunkp));
}
}
} /* else no chunk spec at all, let defaults set at nc_def_var() be used */
done:
if(ofqn) free(ofqn);
return stat;
}

View File

@ -90,7 +90,6 @@ ${NCCOPY} -c ivar:7,1,2,1,5,1,9 tst_nc5.nc tmp_nc5.nc
${NCDUMP} -n tst_nc5 tmp_nc5.nc > tmp_nc5.cdl
# Verify that the core cdl is the same
diff tst_nc5.cdl tmp_nc5.cdl
# Look at the output chunking of ivar
rm -f tmp_nc5a.cdl # reuse
${NCDUMP} -hs -n tst_nc5 tmp_nc5.nc > tmp_nc5.cdl
@ -99,7 +98,6 @@ TESTLINE=`sed -e '/ivar:_ChunkSizes/p' -e d <tmp_nc5.cdl`
# track line to match
BASELINE='ivar:_ChunkSizes = 7, 1, 2, 1, 5, 1, 9 ;'
verifychunkline "$TESTLINE" "$BASELINE"
# Make sure that fvar was not chunked
checkfvar tmp_nc5.cdl
@ -153,7 +151,7 @@ if test "x$T4" = x1 ; then
echo "*** Test nccopy -c with unlimited dimension; classic ->enhanced"
reset
./tst_chunking tst_nc5.nc unlimited
./tst_chunking tst_nc5.nc unlimited # should produce modified tmp_nc5.nc iwth ivar of rank 2
${NCDUMP} -n tst_nc5 tst_nc5.nc > tst_nc5.cdl
${NCCOPY} -c ivar:5,3 tst_nc5.nc tmp_nc5.nc
${NCDUMP} -n tst_nc5 tmp_nc5.nc > tmp_nc5.cdl