mirror of
https://github.com/Unidata/netcdf-c.git
synced 2025-03-19 17:30:27 +08:00
Added and documented nccopy options to specify chunk cache. Fixed nccopy bug of not permit chunk lengths >= dimension lengths. Added experimental (undocumented) option for computing adequate chunk cache.
This commit is contained in:
parent
17d09ddac5
commit
f10a142411
@ -14,6 +14,8 @@ nccopy
|
||||
\%[-s]
|
||||
\%[-u]
|
||||
\%[-m \fI bufsize \fP]
|
||||
\%[-h \fI chunk_cache \fP]
|
||||
\%[-e \fI cache_elems \fP]
|
||||
\%\fI infile \fP
|
||||
\%\fI outfile \fP
|
||||
.hy
|
||||
@ -113,20 +115,44 @@ for variables that use the `m' and `n' dimensions might be
|
||||
resulting from copying with a chunkspec, use the `-s'
|
||||
option of ncdump on the output file.
|
||||
.IP "\fB -m \fP \fI bufsize \fP"
|
||||
Specifies the size, in bytes, of the copy buffer used to
|
||||
to copy large variables, by copying them in smaller pieces, each no
|
||||
larger than \fI bufsize \fP. A suffix of k, m, or g multiplies
|
||||
the copy buffer size by one thousand, million, or billion, respectively.
|
||||
The default is 5000000 bytes,
|
||||
An integer or floating-point number that specifies the size, in bytes,
|
||||
of the copy buffer used
|
||||
to copy large variables. A suffix of K, M, G, or T multiplies
|
||||
the copy buffer size by one thousand, million, billion, or trillion, respectively.
|
||||
The default is 5,000,000 bytes,
|
||||
but will be increased if necessary to hold at least one chunk of
|
||||
netCDF-4 chunked variables in the input file. You may want to specify
|
||||
a value larger than the default for OPeNDAP copies of large files over high
|
||||
a value larger than the default for copying large files over high
|
||||
latency networks.
|
||||
.IP "\fB -h \fP \fI chunk_cache \fP"
|
||||
An integer or floating-point number that specifies the size in bytes
|
||||
of chunk cache for chunked variables. This is
|
||||
not a property of the file, but merely a performance tuning parameter
|
||||
for avoiding compressing or decompressing the same data multiple times
|
||||
while copying and changing chunk shapes. A suffix of K, M, G, or T multiplies
|
||||
the chunk cache size by one thousand, million, billion, or trillion, respectively.
|
||||
The default is 4,194,304 (or whatever was specified for the
|
||||
configure-time constant CHUNK_CACHE_SIZE when the netCDF library was
|
||||
built). Ideally, the nccopy utility should accept only one memory
|
||||
buffer size and divide it optimally between a copy buffer and chunk
|
||||
cache, but no general algorithm for computing the optimum chunk cache
|
||||
size has been implemented yet.
|
||||
.IP "\fB -e \fP \fI cache_elems \fP"
|
||||
Specifies number of elements that the chunk cache can hold. This is
|
||||
not a property of the file, but merely a performance tuning parameter
|
||||
for avoiding compressing or decompressing the same data multiple times
|
||||
while copying and changing chunk shapes. The default is 1009 (or
|
||||
whatever was specified for the configure-time constant
|
||||
CHUNK_CACHE_NELEMS when the netCDF library was built). Ideally, the
|
||||
nccopy utility should determine an optimum value for this parameter,
|
||||
but no general algorithm for computing the optimum number of chunk
|
||||
cache elements has been implemented yet.
|
||||
.P
|
||||
Note that \fB nccopy \fP requires variables that share a dimension to
|
||||
also share the chunk size associated with that dimension, but the API
|
||||
has no such restriction. With a program you can customize chunking
|
||||
for each variable independently.
|
||||
has no such restriction. If you need to customize chunking
|
||||
for each variable independently, you will need to use the library API
|
||||
in a custom utility program.
|
||||
.SH EXAMPLES
|
||||
.LP
|
||||
Make a copy of foo1.nc, a netCDF file of any type, to foo2.nc, a
|
||||
|
@ -39,10 +39,12 @@ static int option_kind = SAME_AS_INPUT;
|
||||
static int option_deflate_level = -1; /* default, compress output only if input compressed */
|
||||
static int option_shuffle_vars = NC_NOSHUFFLE; /* default, no shuffling on compression */
|
||||
static int option_fix_unlimdims = 0; /* default, preserve unlimited dimensions */
|
||||
static char* option_chunkspec = 0; /* default, no chunk specification */
|
||||
static size_t option_copy_buffer_size = COPY_BUFFER_SIZE;
|
||||
static size_t option_chunk_cache_size = CHUNK_CACHE_SIZE; /* default from config.h */
|
||||
static size_t option_chunk_cache_nelems = CHUNK_CACHE_NELEMS; /* default from config.h */
|
||||
static int option_global_chunk_cache = 1; /* default, use global chunk cache */
|
||||
static int option_compute_chunkcaches = 0; /* default, don't try still flaky estimate of
|
||||
* chunk cache for each variable */
|
||||
|
||||
/* get group id in output corresponding to group igrp in input,
|
||||
* given parent group id (or root group id) parid in output. */
|
||||
@ -538,8 +540,7 @@ set_var_chunked(int ogrp, int o_varid)
|
||||
|
||||
/* Determine if this variable should be chunked. A variable
|
||||
* should be chunked if any of its dims are in command-line
|
||||
* chunk spec and if corresponding chunk size is smaller than
|
||||
* dimension length. It will also be chunked if any of its
|
||||
* chunk spec. It will also be chunked if any of its
|
||||
* dims are unlimited. */
|
||||
for(odim = 0; odim < ndims; odim++) {
|
||||
int odimid = dimids[odim];
|
||||
@ -550,7 +551,7 @@ set_var_chunked(int ogrp, int o_varid)
|
||||
size_t chunksize = chunkspec_size(idimid); /* from chunkspec */
|
||||
size_t dimlen;
|
||||
NC_CHECK(nc_inq_dimlen(ogrp, odimid, &dimlen));
|
||||
if( (chunksize > 0 && chunksize < dimlen) || dimmap_ounlim(odimid)) {
|
||||
if( (chunksize > 0) || dimmap_ounlim(odimid)) {
|
||||
chunked = 1;
|
||||
}
|
||||
varsize *= dimlen;
|
||||
@ -899,29 +900,26 @@ copy_var_data(int igrp, int varid, int ogrp) {
|
||||
int contig = 1;
|
||||
NC_CHECK(nc_inq_var_chunking(ogrp, ovarid, &contig, NULL));
|
||||
if(contig == 0) { /* chunked */
|
||||
if(option_global_chunk_cache) { /* by default, use same
|
||||
* global chunk cache for
|
||||
* all chunked
|
||||
* variables */
|
||||
NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, option_chunk_cache_size,
|
||||
option_chunk_cache_nelems,
|
||||
COPY_CHUNKCACHE_PREEMPTION));
|
||||
} else { /* if experimental "-x" option
|
||||
* specified, try to estimate
|
||||
* variable-specific chunk cache,
|
||||
* depending on specific size and
|
||||
* shape of this variable's chunks */
|
||||
if(option_compute_chunkcaches) {
|
||||
/* Try to estimate variable-specific chunk cache,
|
||||
* depending on specific size and shape of this
|
||||
* variable's chunks. This doesn't work yet. */
|
||||
size_t chunkcache_size, chunkcache_nelems;
|
||||
float chunkcache_preemption;
|
||||
NC_CHECK(inq_var_chunking_params(igrp, varid, ogrp, ovarid,
|
||||
&chunkcache_size,
|
||||
&chunkcache_nelems,
|
||||
&chunkcache_preemption));
|
||||
printf("%s chunkcache_size, chunkcache_nelems: %ld, %ld\n",
|
||||
varname, chunkcache_size, chunkcache_nelems); /* for debugging */
|
||||
NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid, chunkcache_size,
|
||||
NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid,
|
||||
chunkcache_size,
|
||||
chunkcache_nelems,
|
||||
chunkcache_preemption));
|
||||
} else {
|
||||
/* by default, use same chunk cache for all chunked variables */
|
||||
NC_CHECK(nc_set_var_chunk_cache(ogrp, ovarid,
|
||||
option_chunk_cache_size,
|
||||
option_chunk_cache_nelems,
|
||||
COPY_CHUNKCACHE_PREEMPTION));
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -971,8 +969,8 @@ copy_var_data(int igrp, int varid, int ogrp) {
|
||||
#ifdef USE_NETCDF4
|
||||
/* We're all done with this input and output variable, so if
|
||||
* either variable is chunked, free up its variable chunk cache */
|
||||
NC_CHECK(free_var_chunk_cache(igrp, varid));
|
||||
NC_CHECK(free_var_chunk_cache(ogrp, ovarid));
|
||||
/* NC_CHECK(free_var_chunk_cache(igrp, varid)); */
|
||||
/* NC_CHECK(free_var_chunk_cache(ogrp, ovarid)); */
|
||||
#endif /* USE_NETCDF4 */
|
||||
free(start);
|
||||
free(count);
|
||||
@ -1046,9 +1044,7 @@ count_dims(ncid) {
|
||||
* type 1 or 2.
|
||||
*/
|
||||
static int
|
||||
copy(char* infile, char* outfile,
|
||||
const char* chunkspec_s /* unparsed chunkspec string, from command line */
|
||||
)
|
||||
copy(char* infile, char* outfile)
|
||||
{
|
||||
int stat = NC_NOERR;
|
||||
int igrp, ogrp;
|
||||
@ -1066,7 +1062,7 @@ copy(char* infile, char* outfile,
|
||||
if (inkind == NC_FORMAT_CLASSIC || inkind == NC_FORMAT_64BIT) {
|
||||
if (option_deflate_level > 0 ||
|
||||
option_shuffle_vars == NC_SHUFFLE ||
|
||||
chunkspec_s)
|
||||
option_chunkspec)
|
||||
{
|
||||
outkind = NC_FORMAT_NETCDF4_CLASSIC;
|
||||
}
|
||||
@ -1074,10 +1070,10 @@ copy(char* infile, char* outfile,
|
||||
}
|
||||
|
||||
#ifdef USE_NETCDF4
|
||||
if(chunkspec_s) {
|
||||
/* Now that input is open, can parse chunkspec_s into binary
|
||||
if(option_chunkspec) {
|
||||
/* Now that input is open, can parse option_chunkspec into binary
|
||||
* structure. */
|
||||
NC_CHECK(chunkspec_parse(igrp, chunkspec_s));
|
||||
NC_CHECK(chunkspec_parse(igrp, option_chunkspec));
|
||||
}
|
||||
#endif /* USE_NETCDF4 */
|
||||
|
||||
@ -1141,11 +1137,13 @@ usage(void)
|
||||
[-m n] set size in bytes of copy buffer, default is 5000000 bytes\n\
|
||||
[-h n] set size in bytes of chunk_cache for chunked variables\n\
|
||||
[-e n] set number of elements that chunk_cache can hold\n\
|
||||
[-x] use experimental computed estimates for variable-specific chunk caches\n\
|
||||
infile name of netCDF input file\n\
|
||||
outfile name for netCDF output file\n"
|
||||
|
||||
error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-m n] [-h n] [-e n] [-x] infile outfile\n%s",
|
||||
/* Don't document this flaky option until it works better */
|
||||
/* [-x] use experimental computed estimates for variable-specific chunk caches\n\ */
|
||||
|
||||
error("%s [-k n] [-d n] [-s] [-c chunkspec] [-u] [-m n] [-h n] [-e n] infile outfile\n%s",
|
||||
progname, USAGE);
|
||||
}
|
||||
|
||||
@ -1155,7 +1153,6 @@ main(int argc, char**argv)
|
||||
char* inputfile = NULL;
|
||||
char* outputfile = NULL;
|
||||
int c;
|
||||
char* chunkspec = 0;
|
||||
|
||||
/* table of formats for legal -k values */
|
||||
struct Kvalues {
|
||||
@ -1273,16 +1270,16 @@ main(int argc, char**argv)
|
||||
if(*suffix) {
|
||||
switch (*suffix) {
|
||||
case 'k': case 'K':
|
||||
option_chunk_cache_size *= 1000;
|
||||
dval *= 1000;
|
||||
break;
|
||||
case 'm': case 'M':
|
||||
option_chunk_cache_size *= 1000000;
|
||||
dval *= 1000000;
|
||||
break;
|
||||
case 'g': case 'G':
|
||||
option_chunk_cache_size *= 1000000000;
|
||||
dval *= 1000000000;
|
||||
break;
|
||||
case 't': case 'T':
|
||||
option_chunk_cache_size *= 1.0e12;
|
||||
dval *= 1.0e12;
|
||||
break;
|
||||
default:
|
||||
error("If suffix used for '-h' option value, it must be K, M, G, or T: %c",
|
||||
@ -1299,12 +1296,12 @@ main(int argc, char**argv)
|
||||
}
|
||||
break;
|
||||
case 'x': /* use experimental variable-specific chunk caches */
|
||||
option_global_chunk_cache = 0;
|
||||
option_compute_chunkcaches = 1;
|
||||
break;
|
||||
case 'c': /* optional chunking spec for each dimension in list */
|
||||
{
|
||||
/* save chunkspec string for parsing later, once we know input ncid */
|
||||
chunkspec = strdup(optarg);
|
||||
option_chunkspec = strdup(optarg);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -1324,7 +1321,7 @@ main(int argc, char**argv)
|
||||
error("output would overwrite input");
|
||||
}
|
||||
|
||||
if(copy(inputfile, outputfile, chunkspec) != NC_NOERR)
|
||||
if(copy(inputfile, outputfile) != NC_NOERR)
|
||||
exit(1);
|
||||
return 0;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user