diff --git a/ncdump/nccopy.1 b/ncdump/nccopy.1 index 3de4ab0b6..4b1f2b704 100644 --- a/ncdump/nccopy.1 +++ b/ncdump/nccopy.1 @@ -14,6 +14,8 @@ nccopy \%[-s] \%[-u] \%[-m \fI bufsize \fP] +\%[-h \fI chunk_cache \fP] +\%[-e \fI cache_elems \fP] \%\fI infile \fP \%\fI outfile \fP .hy @@ -113,20 +115,44 @@ for variables that use the `m' and `n' dimensions might be resulting from copying with a chunkspec, use the `-s' option of ncdump on the output file. .IP "\fB -m \fP \fI bufsize \fP" -Specifies the size, in bytes, of the copy buffer used to -to copy large variables, by copying them in smaller pieces, each no -larger than \fI bufsize \fP. A suffix of k, m, or g multiplies -the copy buffer size by one thousand, million, or billion, respectively. -The default is 5000000 bytes, +An integer or floating-point number that specifies the size, in bytes, +of the copy buffer used +to copy large variables. A suffix of K, M, G, or T multiplies +the copy buffer size by one thousand, million, billion, or trillion, respectively. +The default is 5,000,000 bytes, but will be increased if necessary to hold at least one chunk of netCDF-4 chunked variables in the input file. You may want to specify -a value larger than the default for OPeNDAP copies of large files over high +a value larger than the default for copying large files over high latency networks. +.IP "\fB -h \fP \fI chunk_cache \fP" +An integer or floating-point number that specifies the size in bytes +of chunk cache for chunked variables. This is +not a property of the file, but merely a performance tuning parameter +for avoiding compressing or decompressing the same data multiple times +while copying and changing chunk shapes. A suffix of K, M, G, or T multiplies +the chunk cache size by one thousand, million, billion, or trillion, respectively. +The default is 4,194,304 (or whatever was specified for the +configure-time constant CHUNK_CACHE_SIZE when the netCDF library was +built). Ideally, the nccopy utility should accept only one memory +buffer size and divide it optimally between a copy buffer and chunk +cache, but no general algorithm for computing the optimum chunk cache +size has been implemented yet. +.IP "\fB -e \fP \fI cache_elems \fP" +Specifies number of elements that the chunk cache can hold. This is +not a property of the file, but merely a performance tuning parameter +for avoiding compressing or decompressing the same data multiple times +while copying and changing chunk shapes. The default is 1009 (or +whatever was specified for the configure-time constant +CHUNK_CACHE_NELEMS when the netCDF library was built). Ideally, the +nccopy utility should determine an optimum value for this parameter, +but no general algorithm for computing the optimum number of chunk +cache elements has been implemented yet. .P Note that \fB nccopy \fP requires variables that share a dimension to also share the chunk size associated with that dimension, but the API -has no such restriction. With a program you can customize chunking -for each variable independently. +has no such restriction. If you need to customize chunking +for each variable independently, you will need to use the library API +in a custom utility program. .SH EXAMPLES .LP Make a copy of foo1.nc, a netCDF file of any type, to foo2.nc, a diff --git a/ncdump/nccopy.c b/ncdump/nccopy.c index fa78effba..de37514f5 100644 --- a/ncdump/nccopy.c +++ b/ncdump/nccopy.c @@ -39,10 +39,12 @@ static int option_kind = SAME_AS_INPUT; static int option_deflate_level = -1; /* default, compress output only if input compressed */ static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */ static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */ +static char* option_chunkspec = 0; /* default, no chunk specification */ static size_t option_copy_buffer_size = COPY_BUFFER_SIZE; static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */ static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */ -static int option_global_chunk_cache = 1; /* default, use global chunk cache */ +static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of + * chunk cache for each variable */ /* get group id in output corresponding to group igrp in input, * given parent group id (or root group id) parid in output. */ @@ -538,8 +540,7 @@ set_var_chunked(int ogrp, int o_varid) /* Determine if this variable should be chunked. A variable * should be chunked if any of its dims are in command-line - * chunk spec and if corresponding chunk size is smaller than - * dimension length. It will also be chunked if any of its + * chunk spec. It will also be chunked if any of its * dims are unlimited. */ for(odim = 0; odim < ndims; odim++) { int odimid = dimids[odim]; @@ -550,7 +551,7 @@ set_var_chunked(int ogrp, int o_varid) size_t chunksize = chunkspec_size(idimid); /* from chunkspec */ size_t dimlen; NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen)); - if( (chunksize > 0 && chunksize < dimlen) || dimmap_ounlim(odimid)) { + if( (chunksize > 0) || dimmap_ounlim(odimid)) { chunked = 1; } varsize *= dimlen; @@ -899,29 +900,26 @@ copy_var_data(int igrp, int varid, int ogrp) { int contig = 1; NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL)); if(contig == 0) { /* chunked */ - if(option_global_chunk_cache) { /* by default, use same - * global chunk cache for - * all chunked - * variables */ - NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, option_chunk_cache_size, - option_chunk_cache_nelems, - COPY_CHUNKCACHE_PREEMPTION)); - } else { /* if experimental "-x" option - * specified, try to estimate - * variable-specific chunk cache, - * depending on specific size and - * shape of this variable's chunks */ + if(option_compute_chunkcaches) { + /* Try to estimate variable-specific chunk cache, + * depending on specific size and shape of this + * variable's chunks. This doesn't work yet. */ size_t chunkcache_size, chunkcache_nelems; float chunkcache_preemption; NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid, &chunkcache_size, &chunkcache_nelems, &chunkcache_preemption)); - printf("%s chunkcache_size, chunkcache_nelems: %ld, %ld\n", - varname, chunkcache_size, chunkcache_nelems); /* for debugging */ - NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, chunkcache_size, + NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, + chunkcache_size, chunkcache_nelems, chunkcache_preemption)); + } else { + /* by default, use same chunk cache for all chunked variables */ + NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, + option_chunk_cache_size, + option_chunk_cache_nelems, + COPY_CHUNKCACHE_PREEMPTION)); } } } @@ -971,8 +969,8 @@ copy_var_data(int igrp, int varid, int ogrp) { #ifdef USE_NETCDF4 /* We're all done with this input and output variable, so if * either variable is chunked, free up its variable chunk cache */ - NC_CHECK(free_var_chunk_cache(igrp, varid)); - NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); + /* NC_CHECK(free_var_chunk_cache(igrp, varid)); */ + /* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */ #endif /* USE_NETCDF4 */ free(start); free(count); @@ -1046,9 +1044,7 @@ count_dims(ncid) { * type 1 or 2. */ static int -copy(char* infile, char* outfile, - const char* chunkspec_s /* unparsed chunkspec string, from command line */ - ) +copy(char* infile, char* outfile) { int stat = NC_NOERR; int igrp, ogrp; @@ -1066,7 +1062,7 @@ copy(char* infile, char* outfile, if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT) { if (option_deflate_level > 0 || option_shuffle_vars == NC_SHUFFLE || - chunkspec_s) + option_chunkspec) { outkind = NC_FORMAT_NETCDF4_CLASSIC; } @@ -1074,10 +1070,10 @@ copy(char* infile, char* outfile, } #ifdef USE_NETCDF4 - if(chunkspec_s) { - /* Now that input is open, can parse chunkspec_s into binary + if(option_chunkspec) { + /* Now that input is open, can parse option_chunkspec into binary * structure. */ - NC_CHECK(chunkspec_parse(igrp, chunkspec_s)); + NC_CHECK(chunkspec_parse(igrp, option_chunkspec)); } #endif /* USE_NETCDF4 */ @@ -1141,11 +1137,13 @@ usage(void) [-m n] set size in bytes of copy buffer, default is 5000000 bytes\n\ [-h n] set size in bytes of chunk_cache for chunked variables\n\ [-e n] set number of elements that chunk_cache can hold\n\ - [-x] use experimental computed estimates for variable-specific chunk caches\n\ infile name of netCDF input file\n\ outfile name for netCDF output file\n" - error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-m n] [-h n] [-e n] [-x] infile outfile\n%s", + /* Don't document this flaky option until it works better */ + /* [-x] use experimental computed estimates for variable-specific chunk caches\n\ */ + + error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-m n] [-h n] [-e n] infile outfile\n%s", progname, USAGE); } @@ -1155,7 +1153,6 @@ main(int argc, char**argv) char* inputfile = NULL; char* outputfile = NULL; int c; - char* chunkspec = 0; /* table of formats for legal -k values */ struct Kvalues { @@ -1273,16 +1270,16 @@ main(int argc, char**argv) if(*suffix) { switch (*suffix) { case 'k': case 'K': - option_chunk_cache_size *= 1000; + dval *= 1000; break; case 'm': case 'M': - option_chunk_cache_size *= 1000000; + dval *= 1000000; break; case 'g': case 'G': - option_chunk_cache_size *= 1000000000; + dval *= 1000000000; break; case 't': case 'T': - option_chunk_cache_size *= 1.0e12; + dval *= 1.0e12; break; default: error("If suffix used for '-h' option value, it must be K, M, G, or T: %c", @@ -1299,12 +1296,12 @@ main(int argc, char**argv) } break; case 'x': /* use experimental variable-specific chunk caches */ - option_global_chunk_cache = 0; + option_compute_chunkcaches = 1; break; case 'c': /* optional chunking spec for each dimension in list */ { /* save chunkspec string for parsing later, once we know input ncid */ - chunkspec = strdup(optarg); + option_chunkspec = strdup(optarg); break; } default: @@ -1324,7 +1321,7 @@ main(int argc, char**argv) error("output would overwrite input"); } - if(copy(inputfile, outputfile, chunkspec) != NC_NOERR) + if(copy(inputfile, outputfile) != NC_NOERR) exit(1); return 0; }