netcdf-c/libdispatch/dcopy.c
Dennis Heimbigner 59e04ae071 This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".

The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.

More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).

WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:

Platform | Build System | S3 support
------------------------------------
Linux+gcc      | Automake     | yes
Linux+gcc      | CMake        | yes
Visual Studio  | CMake        | no

Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future.  Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.

In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*.  The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
   and the version bumped.
4. An overly complex set of structs was created to support funnelling
   all of the filterx operations thru a single dispatch
   "filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
   to nczarr.

Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
   -- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
   support zarr and to regularize the structure of the fragments
   section of a URL.

Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
   e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
   * Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
   and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.

Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-28 18:02:47 -06:00

714 lines
20 KiB
C

/**
* @file
* Copyright 2018 University Corporation for Atmospheric
* Research/Unidata. See COPYRIGHT file for more info.
*
* This file has the var and att copy functions.
*
* @author Dennis Heimbigner
*/
#include "config.h"
#include "ncdispatch.h"
#include "nc_logging.h"
#ifdef USE_NETCDF4
/**
* @internal Compare two netcdf types for equality. Must have the
* ncids as well, to find user-defined types.
*
* @param ncid1 File ID.
* @param typeid1 Type ID.
* @param ncid2 File ID.
* @param typeid2 Type ID.
* @param equalp Pointer that gets 1 of the types are equal, 0
* otherwise.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
static int
NC_compare_nc_types(int ncid1, int typeid1, int ncid2, int typeid2,
int *equalp)
{
int ret = NC_NOERR;
/* If you don't care about the answer, neither do I! */
if(equalp == NULL)
return NC_NOERR;
/* Assume the types are not equal. If we find any inequality, then
exit with NC_NOERR and we're done. */
*equalp = 0;
/* Atomic types are so easy! */
if (typeid1 <= NC_MAX_ATOMIC_TYPE)
{
if (typeid2 != typeid1)
return NC_NOERR;
*equalp = 1;
}
else
{
int i, ret, equal1;
char name1[NC_MAX_NAME];
char name2[NC_MAX_NAME];
size_t size1, size2;
nc_type base1, base2;
size_t nelems1, nelems2;
int class1, class2;
void* value1 = NULL;
void* value2 = NULL;
size_t offset1, offset2;
nc_type ftype1, ftype2;
int ndims1, ndims2;
int dimsizes1[NC_MAX_VAR_DIMS];
int dimsizes2[NC_MAX_VAR_DIMS];
/* Find out about the two types. */
if ((ret = nc_inq_user_type(ncid1, typeid1, name1, &size1,
&base1, &nelems1, &class1)))
return ret;
if ((ret = nc_inq_user_type(ncid2, typeid2, name2, &size2,
&base2, &nelems2, &class2)))
return ret;
/* Check the obvious. */
if(size1 != size2 || class1 != class2 || strcmp(name1,name2))
return NC_NOERR;
/* Check user-defined types in detail. */
switch(class1)
{
case NC_VLEN:
if((ret = NC_compare_nc_types(ncid1, base1, ncid2,
base1, &equal1)))
return ret;
if(!equal1)
return NC_NOERR;
break;
case NC_OPAQUE:
/* Already checked size above. */
break;
case NC_ENUM:
if(base1 != base2 || nelems1 != nelems2) return NC_NOERR;
if (!(value1 = malloc(size1)))
return NC_ENOMEM;
if (!(value2 = malloc(size2))) {
free(value1);
return NC_ENOMEM;
}
for(i = 0; i < nelems1; i++)
{
if ((ret = nc_inq_enum_member(ncid1, typeid1, i, name1,
value1)) ||
(ret = nc_inq_enum_member(ncid2, typeid2, i, name2,
value2)) ||
strcmp(name1, name2) || memcmp(value1, value2, size1))
{
free(value1);
free(value2);
return ret;
}
}
free(value1);
free(value2);
break;
case NC_COMPOUND:
if(nelems1 != nelems2)
return NC_NOERR;
/* Compare each field. Each must be equal! */
for(i = 0; i < nelems1; i++)
{
int j;
if ((ret = nc_inq_compound_field(ncid1, typeid1, i, name1, &offset1,
&ftype1, &ndims1, dimsizes1)))
return ret;
if ((ret = nc_inq_compound_field(ncid2, typeid2, i, name2, &offset2,
&ftype2, &ndims2, dimsizes2)))
return ret;
if(ndims1 != ndims2)
return NC_NOERR;
for(j = 0; j < ndims1;j++)
if(dimsizes1[j] != dimsizes2[j])
return NC_NOERR;
/* Compare user-defined field types. */
if((ret = NC_compare_nc_types(ncid1, ftype1, ncid2, ftype2,
&equal1)))
return ret;
if(!equal1)
return NC_NOERR;
}
break;
default:
return NC_EINVAL;
}
*equalp = 1;
}
return ret;
}
/**
* @internal Recursively hunt for a netCDF type id. (Code from
* nc4internal.c); Return matching typeid or 0 if not found.
*
* @param ncid1 File ID.
* @param tid1 Type ID.
* @param ncid2 File ID.
* @param tid2 Pointer that gets type ID of equal type.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
static int
NC_rec_find_nc_type(int ncid1, nc_type tid1, int ncid2, nc_type* tid2)
{
int i,ret = NC_NOERR;
int nids;
int* ids = NULL;
/* Get all types in grp ncid2 */
if(tid2)
*tid2 = 0;
if ((ret = nc_inq_typeids(ncid2, &nids, NULL)))
return ret;
if (nids)
{
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
return NC_ENOMEM;
if ((ret = nc_inq_typeids(ncid2, &nids, ids)))
return ret;
for(i = 0; i < nids; i++)
{
int equal = 0;
if ((ret = NC_compare_nc_types(ncid1, tid1, ncid2, ids[i], &equal)))
return ret;
if(equal)
{
if(tid2)
*tid2 = ids[i];
free(ids);
return NC_NOERR;
}
}
free(ids);
}
/* recurse */
if ((ret = nc_inq_grps(ncid1, &nids, NULL)))
return ret;
if (nids)
{
if (!(ids = (int *)malloc((size_t)nids * sizeof(int))))
return NC_ENOMEM;
if ((ret = nc_inq_grps(ncid1, &nids, ids)))
{
free(ids);
return ret;
}
for (i = 0; i < nids; i++)
{
ret = NC_rec_find_nc_type(ncid1, tid1, ids[i], tid2);
if (ret && ret != NC_EBADTYPE)
break;
if (tid2 && *tid2 != 0) /* found */
{
free(ids);
return NC_NOERR;
}
}
free(ids);
}
return NC_EBADTYPE; /* not found */
}
/**
* @internal Given a type in one file, find its equal (if any) in
* another file. It sounds so simple, but it's a real pain!
*
* @param ncid1 File ID.
* @param xtype1 Type ID.
* @param ncid2 File ID.
* @param xtype2 Pointer that gets type ID of equal type.
*
* @return ::NC_NOERR No error.
* @author Ed Hartnett
*/
static int
NC_find_equal_type(int ncid1, nc_type xtype1, int ncid2, nc_type *xtype2)
{
int ret = NC_NOERR;
/* Check input */
if(xtype1 <= NC_NAT)
return NC_EINVAL;
/* Handle atomic types. */
if (xtype1 <= NC_MAX_ATOMIC_TYPE)
{
if(xtype2)
*xtype2 = xtype1;
return NC_NOERR;
}
/* Recursively search group ncid2 and its children
to find a type that is equal (using compare_type)
to xtype1. */
ret = NC_rec_find_nc_type(ncid1, xtype1 , ncid2, xtype2);
return ret;
}
#endif /* USE_NETCDF4 */
/**
* This will copy a variable that is an array of primitive type and
* its attributes from one file to another, assuming dimensions in the
* output file are already defined and have same dimension IDs and
* length. However it doesn't work for copying netCDF-4 variables of
* type string or a user-defined type.
*
* This function works even if the files are different formats,
* (for example, one netcdf classic, the other netcdf-4).
*
* If you're copying into a classic-model file, from a netcdf-4 file,
* you must be copying a variable of one of the six classic-model
* types, and similarly for the attributes.
*
* For large netCDF-3 files, this can be a very inefficient way to
* copy data from one file to another, because adding a new variable
* to the target file may require more space in the header and thus
* result in moving data for other variables in the target file. This
* is not a problem for netCDF-4 files, which support efficient
* addition of variables without moving data for other variables.
*
* @param ncid_in File ID to copy from.
* @param varid_in Variable ID to copy.
* @param ncid_out File ID to copy to.
*
* @return ::NC_NOERR No error.
* @author Glenn Davis, Ed Hartnett, Dennis Heimbigner
*/
int
nc_copy_var(int ncid_in, int varid_in, int ncid_out)
{
char name[NC_MAX_NAME + 1];
char att_name[NC_MAX_NAME + 1];
nc_type xtype;
int ndims, dimids_in[NC_MAX_VAR_DIMS], dimids_out[NC_MAX_VAR_DIMS], natts, real_ndims;
int varid_out;
int a, d;
void *data = NULL;
size_t *count = NULL, *start = NULL;
size_t reclen = 1;
size_t *dimlen = NULL;
int retval = NC_NOERR;
size_t type_size;
int src_format, dest_format;
char type_name[NC_MAX_NAME+1];
char dimname_in[NC_MAX_NAME + 1];
int i;
/* Learn about this var. */
if ((retval = nc_inq_var(ncid_in, varid_in, name, &xtype,
&ndims, dimids_in, &natts)))
return retval;
/* find corresponding dimids in the output file */
for(i = 0; i < ndims; i++) {
dimids_out[i] = dimids_in[i];
if ((retval = nc_inq_dimname(ncid_in, dimids_in[i], dimname_in)))
return retval;
if ((retval = nc_inq_dimid(ncid_out, dimname_in, &dimids_out[i])))
return retval;
}
LOG((2, "nc_copy_var: ncid_in 0x%x varid_in %d ncid_out 0x%x",
ncid_in, varid_in, ncid_out));
/* Make sure we are not trying to write into a netcdf-3 file
* anything that won't fit in netcdf-3. */
if ((retval = nc_inq_format(ncid_in, &src_format)))
return retval;
if ((retval = nc_inq_format(ncid_out, &dest_format)))
return retval;
if ((dest_format == NC_FORMAT_CLASSIC
|| dest_format == NC_FORMAT_64BIT_DATA
|| dest_format == NC_FORMAT_64BIT_OFFSET) &&
src_format == NC_FORMAT_NETCDF4 && xtype > NC_DOUBLE)
return NC_ENOTNC4;
/* Later on, we will need to know the size of this type. */
if ((retval = nc_inq_type(ncid_in, xtype, type_name, &type_size)))
return retval;
LOG((3, "type %s has size %d", type_name, type_size));
/* Switch back to define mode, and create the output var. */
retval = nc_redef(ncid_out);
if (retval && retval != NC_EINDEFINE)
BAIL(retval);
if ((retval = nc_def_var(ncid_out, name, xtype,
ndims, dimids_out, &varid_out)))
BAIL(retval);
/* Copy the attributes. */
for (a=0; a<natts; a++)
{
if ((retval = nc_inq_attname(ncid_in, varid_in, a, att_name)))
BAIL(retval);
if ((retval = nc_copy_att(ncid_in, varid_in, att_name,
ncid_out, varid_out)))
BAIL(retval);
}
/* End define mode, to write metadata and create file. */
nc_enddef(ncid_out);
nc_sync(ncid_out);
/* Allocate memory for our start and count arrays. If ndims = 0
this is a scalar, which I will treat as a 1-D array with one
element. */
real_ndims = ndims ? ndims : 1;
if (!(start = malloc((size_t)real_ndims * sizeof(size_t))))
BAIL(NC_ENOMEM);
if (!(count = malloc((size_t)real_ndims * sizeof(size_t))))
BAIL(NC_ENOMEM);
/* The start array will be all zeros, except the first element,
which will be the record number. Count will be the dimension
size, except for the first element, which will be one, because
we will copy one record at a time. For this we need the var
shape. */
if (!(dimlen = malloc((size_t)real_ndims * sizeof(size_t))))
BAIL(NC_ENOMEM);
/* Set to 0, to correct for an unlikely dereference
error reported by clang/llvm. */
dimlen[0] = 0;
/* Find out how much data. */
for (d=0; d<ndims; d++)
{
if ((retval = nc_inq_dimlen(ncid_in, dimids_in[d], &dimlen[d])))
BAIL(retval);
LOG((4, "nc_copy_var: there are %d data", dimlen[d]));
}
/* If this is really a scalar, then set the dimlen to 1. */
if (ndims == 0)
dimlen[0] = 1;
for (d=0; d<real_ndims; d++)
{
start[d] = 0;
count[d] = d ? dimlen[d] : 1;
if (d) reclen *= dimlen[d];
}
/* If there are no records, we're done. */
if (!dimlen[0])
goto exit;
/* Allocate memory for one record. */
if (!(data = malloc(reclen * type_size))) {
if(count) free(count);
if(dimlen) free(dimlen);
if(start) free(start);
return NC_ENOMEM;
}
/* Copy the var data one record at a time. */
for (start[0]=0; !retval && start[0]<(size_t)dimlen[0]; start[0]++)
{
switch (xtype)
{
case NC_BYTE:
retval = nc_get_vara_schar(ncid_in, varid_in, start, count,
(signed char *)data);
if (!retval)
retval = nc_put_vara_schar(ncid_out, varid_out, start, count,
(const signed char *)data);
break;
case NC_CHAR:
retval = nc_get_vara_text(ncid_in, varid_in, start, count,
(char *)data);
if (!retval)
retval = nc_put_vara_text(ncid_out, varid_out, start, count,
(char *)data);
break;
case NC_SHORT:
retval = nc_get_vara_short(ncid_in, varid_in, start, count,
(short *)data);
if (!retval)
retval = nc_put_vara_short(ncid_out, varid_out, start, count,
(short *)data);
break;
case NC_INT:
retval = nc_get_vara_int(ncid_in, varid_in, start, count,
(int *)data);
if (!retval)
retval = nc_put_vara_int(ncid_out, varid_out, start, count,
(int *)data);
break;
case NC_FLOAT:
retval = nc_get_vara_float(ncid_in, varid_in, start, count,
(float *)data);
if (!retval)
retval = nc_put_vara_float(ncid_out, varid_out, start, count,
(float *)data);
break;
case NC_DOUBLE:
retval = nc_get_vara_double(ncid_in, varid_in, start, count,
(double *)data);
if (!retval)
retval = nc_put_vara_double(ncid_out, varid_out, start, count,
(double *)data);
break;
case NC_UBYTE:
retval = nc_get_vara_uchar(ncid_in, varid_in, start, count,
(unsigned char *)data);
if (!retval)
retval = nc_put_vara_uchar(ncid_out, varid_out, start, count,
(unsigned char *)data);
break;
case NC_USHORT:
retval = nc_get_vara_ushort(ncid_in, varid_in, start, count,
(unsigned short *)data);
if (!retval)
retval = nc_put_vara_ushort(ncid_out, varid_out, start, count,
(unsigned short *)data);
break;
case NC_UINT:
retval = nc_get_vara_uint(ncid_in, varid_in, start, count,
(unsigned int *)data);
if (!retval)
retval = nc_put_vara_uint(ncid_out, varid_out, start, count,
(unsigned int *)data);
break;
case NC_INT64:
retval = nc_get_vara_longlong(ncid_in, varid_in, start, count,
(long long *)data);
if (!retval)
retval = nc_put_vara_longlong(ncid_out, varid_out, start, count,
(long long *)data);
break;
case NC_UINT64:
retval = nc_get_vara_ulonglong(ncid_in, varid_in, start, count,
(unsigned long long *)data);
if (!retval)
retval = nc_put_vara_ulonglong(ncid_out, varid_out, start, count,
(unsigned long long *)data);
break;
default:
retval = NC_EBADTYPE;
}
}
exit:
if (data) free(data);
if (dimlen) free(dimlen);
if (start) free(start);
if (count) free(count);
return retval;
}
/**
* Copy an attribute from one open file to another. This is called by
* nc_copy_att().
*
* @param ncid_in File ID to copy from.
* @param varid_in Variable ID to copy from.
* @param name Name of attribute to copy.
* @param ncid_out File ID to copy to.
* @param varid_out Variable ID to copy to.
*
* @return ::NC_NOERR No error.
* @author Glenn Davis, Ed Hartnett, Dennis Heimbigner
*/
static int
NC_copy_att(int ncid_in, int varid_in, const char *name,
int ncid_out, int varid_out)
{
nc_type xtype;
size_t len;
void *data=NULL;
int res;
LOG((2, "nc_copy_att: ncid_in 0x%x varid_in %d name %s",
ncid_in, varid_in, name));
/* Find out about the attribute to be copied. */
if ((res = nc_inq_att(ncid_in, varid_in, name, &xtype, &len)))
return res;
if (xtype < NC_STRING)
{
/* Handle non-string atomic types. */
if (len)
{
size_t size = NC_atomictypelen(xtype);
assert(size > 0);
if (!(data = malloc(len * size)))
return NC_ENOMEM;
}
res = nc_get_att(ncid_in, varid_in, name, data);
if (!res)
res = nc_put_att(ncid_out, varid_out, name, xtype,
len, data);
if (len)
free(data);
}
#ifdef USE_NETCDF4
else if (xtype == NC_STRING)
{
/* Copy string attributes. */
char **str_data;
if (!(str_data = malloc(sizeof(char *) * len)))
return NC_ENOMEM;
res = nc_get_att_string(ncid_in, varid_in, name, str_data);
if (!res)
res = nc_put_att_string(ncid_out, varid_out, name, len,
(const char **)str_data);
nc_free_string(len, str_data);
free(str_data);
}
else
{
/* Copy user-defined type attributes. */
int class;
size_t size;
void *data;
nc_type xtype_out = NC_NAT;
/* Find out if there is an equal type in the output file. */
/* Note: original code used a libsrc4 specific internal function
which we had to "duplicate" here */
if ((res = NC_find_equal_type(ncid_in, xtype, ncid_out, &xtype_out)))
return res;
if (xtype_out)
{
/* We found an equal type! */
if ((res = nc_inq_user_type(ncid_in, xtype, NULL, &size,
NULL, NULL, &class)))
return res;
if (class == NC_VLEN) /* VLENs are different... */
{
nc_vlen_t *vldata;
int i;
if (!(vldata = malloc(sizeof(nc_vlen_t) * len)))
return NC_ENOMEM;
if ((res = nc_get_att(ncid_in, varid_in, name, vldata)))
return res;
if ((res = nc_put_att(ncid_out, varid_out, name, xtype_out,
len, vldata)))
return res;
for (i = 0; i < len; i++)
if((res = nc_free_vlen(&vldata[i])))
return res;
free(vldata);
}
else /* not VLEN */
{
if (!(data = malloc(size * len)))
return NC_ENOMEM;
res = nc_get_att(ncid_in, varid_in, name, data);
if (!res)
res = nc_put_att(ncid_out, varid_out, name, xtype_out, len, data);
free(data);
}
}
}
#endif /*!USE_NETCDF4*/
return res;
}
/**
* Copy an attribute from one open file to another.
*
* Special programming challenge: this function must work even if one
* of the other of the files is a netcdf version 1.0 file (i.e. not
* HDF5). So only use top level netcdf api functions.
*
* From the netcdf-3 docs: The output netCDF dataset should be in
* define mode if the attribute to be copied does not already exist
* for the target variable, or if it would cause an existing target
* attribute to grow.
*
* @param ncid_in File ID to copy from.
* @param varid_in Variable ID to copy from.
* @param name Name of attribute to copy.
* @param ncid_out File ID to copy to.
* @param varid_out Variable ID to copy to.
*
* @return ::NC_NOERR No error.
* @author Glenn Davis, Ed Hartnett, Dennis Heimbigner
*/
int
nc_copy_att(int ncid_in, int varid_in, const char *name,
int ncid_out, int varid_out)
{
int format, target_natts, target_attid;
char att_name[NC_MAX_NAME + 1];
int a, retval;
/* What is the destination format? */
if ((retval = nc_inq_format(ncid_out, &format)))
return retval;
/* Can't copy to same var in same file. */
if (ncid_in == ncid_out && varid_in == varid_out)
return NC_NOERR;
/* For classic model netCDF-4 files, order of attributes must be
* maintained during copies. We MUST MAINTAIN ORDER! */
if (format == NC_FORMAT_NETCDF4_CLASSIC)
{
/* Does this attribute already exist in the target file? */
retval = nc_inq_attid(ncid_out, varid_out, name, &target_attid);
if (retval == NC_ENOTATT)
{
/* Attribute does not exist. No order to be preserved. */
return NC_copy_att(ncid_in, varid_in, name, ncid_out, varid_out);
}
else if (retval == NC_NOERR)
{
/* How many atts for this var? */
if ((retval = nc_inq_varnatts(ncid_out, varid_out, &target_natts)))
return retval;
/* If this is the last attribute in the target file, we are
* off the hook. */
if (target_attid == target_natts - 1)
return NC_copy_att(ncid_in, varid_in, name, ncid_out, varid_out);
/* Order MUST BE MAINTAINED! Copy all existing atts in the target
* file, stopping at our target att. */
for (a = 0; a < target_natts; a++)
{
if (a == target_attid)
{
if ((retval = NC_copy_att(ncid_in, varid_in, name, ncid_out, varid_out)))
return retval;
}
else
{
if ((retval = nc_inq_attname(ncid_out, varid_out, a, att_name)))
return retval;
if ((retval = NC_copy_att(ncid_out, varid_out, att_name,
ncid_out, varid_out)))
return retval;
}
}
}
else
return retval; /* Some other error occurred. */
}
else
return NC_copy_att(ncid_in, varid_in, name, ncid_out, varid_out);
return NC_NOERR;
}