netcdf-c/libdap2/daputil.c

830 lines
20 KiB
C
Raw Normal View History

2011-04-18 02:56:10 +08:00
/*********************************************************************
2018-12-07 05:21:03 +08:00
* Copyright 2018, UCAR/Unidata
2011-04-18 02:56:10 +08:00
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*********************************************************************/
#include "config.h"
#ifdef HAVE_SYS_TIME_H
2011-04-18 02:56:10 +08:00
#include <sys/time.h>
#endif
2011-04-18 02:56:10 +08:00
#include "oc.h"
extern int oc_dumpnode(OClink, OCddsnode);
2011-04-18 02:56:10 +08:00
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
#include "dapincludes.h"
#include "ncoffsets.h"
2011-04-18 02:56:10 +08:00
#define LBRACKET '['
#define RBRACKET ']'
2011-11-14 12:20:19 +08:00
2013-02-05 02:49:48 +08:00
static char* repairname(const char* name, const char* badchars);
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
static int nccpadding(unsigned long offset, int alignment);
2011-04-18 02:56:10 +08:00
/**************************************************/
/*
Given a legal dap name with arbitrary characters,
2013-02-05 02:49:48 +08:00
convert to equivalent legal cdf name.
Currently, the only change is to convert '/'
names to %2f.
2011-04-18 02:56:10 +08:00
*/
char*
cdflegalname(char* name)
2011-04-18 02:56:10 +08:00
{
if(name != NULL && name[0] == '/')
name = name+1; /* remove leading / so name will be legal */
2013-02-05 02:49:48 +08:00
return repairname(name,"/");
2011-04-18 02:56:10 +08:00
}
/* Define the type conversion of the DAP variables
to the external netCDF variable type.
The proper way is to, for example, convert unsigned short
to an int to maintain the values.
Unfortunately, libnc-dap does not do this:
2011-04-18 02:56:10 +08:00
it translates the types directly. For example
libnc-dap upgrades the DAP byte type, which is unsigned char,
to NC_BYTE, which signed char.
Oh well. So we do the same.
2011-04-18 02:56:10 +08:00
*/
nc_type
nctypeconvert(NCDAPCOMMON* drno, nc_type nctype)
{
nc_type upgrade = NC_NAT;
/* libnc-dap mimic invariant is to maintain type size */
switch (nctype) {
case NC_CHAR: upgrade = NC_CHAR; break;
case NC_BYTE: upgrade = NC_BYTE; break;
case NC_UBYTE: upgrade = NC_BYTE; break;
case NC_SHORT: upgrade = NC_SHORT; break;
case NC_USHORT: upgrade = NC_SHORT; break;
case NC_INT: upgrade = NC_INT; break;
case NC_UINT: upgrade = NC_INT; break;
case NC_FLOAT: upgrade = NC_FLOAT; break;
case NC_DOUBLE: upgrade = NC_DOUBLE; break;
case NC_URL:
case NC_STRING: upgrade = NC_CHAR; break;
default: break;
}
return upgrade;
}
nc_type
octypetonc(OCtype etype)
{
switch (etype) {
case OC_Char: return NC_CHAR;
case OC_Byte: return NC_UBYTE;
case OC_UByte: return NC_UBYTE;
case OC_Int16: return NC_SHORT;
case OC_UInt16: return NC_USHORT;
case OC_Int32: return NC_INT;
case OC_UInt32: return NC_UINT;
case OC_Int64: return NC_INT64;
case OC_UInt64: return NC_UINT64;
case OC_Float32: return NC_FLOAT;
case OC_Float64: return NC_DOUBLE;
case OC_String: return NC_STRING;
case OC_URL: return NC_STRING;
case OC_Dataset: return NC_Dataset;
case OC_Sequence: return NC_Sequence;
case OC_Structure: return NC_Structure;
case OC_Grid: return NC_Grid;
case OC_Dimension: return NC_Dimension;
case OC_Atomic: return NC_Atomic;
2011-04-18 02:56:10 +08:00
default: break;
}
return NC_NAT;
}
OCtype
nctypetodap(nc_type nctype)
{
switch (nctype) {
case NC_CHAR: return OC_Char;
case NC_BYTE: return OC_Byte;
case NC_UBYTE: return OC_UByte;
case NC_SHORT: return OC_Int16;
case NC_USHORT: return OC_UInt16;
case NC_INT: return OC_Int32;
case NC_UINT: return OC_UInt32;
case NC_INT64: return OC_Int64;
case NC_UINT64: return OC_UInt64;
case NC_FLOAT: return OC_Float32;
case NC_DOUBLE: return OC_Float64;
case NC_STRING: return OC_String;
default : break;
}
return OC_NAT;
}
size_t
nctypesizeof(nc_type nctype)
{
switch (nctype) {
case NC_CHAR: return sizeof(char);
case NC_BYTE: return sizeof(signed char);
case NC_UBYTE: return sizeof(unsigned char);
case NC_SHORT: return sizeof(short);
case NC_USHORT: return sizeof(unsigned short);
case NC_INT: return sizeof(int);
case NC_UINT: return sizeof(unsigned int);
case NC_INT64: return sizeof(long long);
case NC_UINT64: return sizeof(unsigned long long);
case NC_FLOAT: return sizeof(float);
case NC_DOUBLE: return sizeof(double);
case NC_STRING: return sizeof(char*);
default: PANIC("nctypesizeof");
}
return 0;
}
char*
nctypetostring(nc_type nctype)
{
switch (nctype) {
case NC_NAT: return "NC_NAT";
case NC_BYTE: return "NC_BYTE";
case NC_CHAR: return "NC_CHAR";
case NC_SHORT: return "NC_SHORT";
case NC_INT: return "NC_INT";
case NC_FLOAT: return "NC_FLOAT";
case NC_DOUBLE: return "NC_DOUBLE";
case NC_UBYTE: return "NC_UBYTE";
case NC_USHORT: return "NC_USHORT";
case NC_UINT: return "NC_UINT";
case NC_INT64: return "NC_INT64";
case NC_UINT64: return "NC_UINT64";
case NC_STRING: return "NC_STRING";
case NC_VLEN: return "NC_VLEN";
case NC_OPAQUE: return "NC_OPAQUE";
case NC_ENUM: return "NC_ENUM";
case NC_COMPOUND: return "NC_COMPOUND";
case NC_URL: return "NC_URL";
case NC_SET: return "NC_SET";
case NC_Dataset: return "NC_Dataset";
case NC_Sequence: return "NC_Sequence";
case NC_Structure: return "NC_Structure";
case NC_Grid: return "NC_Grid";
case NC_Dimension: return "NC_Dimension";
case NC_Atomic: return "NC_Atomic";
2011-04-18 02:56:10 +08:00
default: break;
}
return NULL;
}
/* Pad a buffer */
int
dapalignbuffer(NCbytes* buf, int alignment)
2011-04-18 02:56:10 +08:00
{
int pad;
unsigned long len;
if(buf == NULL) return 0;
len = ncbyteslength(buf);
pad = nccpadding(len,alignment);
#ifdef TEST
for(;pad > 0;pad--)
ncbytesappend(buf,0x3a); /* 0x3a was chosen at random */
#else
ncbytessetlength(buf,len+pad);
#endif
return 1;
}
size_t
dapdimproduct(NClist* dimensions)
2011-04-18 02:56:10 +08:00
{
size_t size = 1;
unsigned int i;
if(dimensions == NULL) return size;
for(i=0;i<nclistlength(dimensions);i++) {
CDFnode* dim = (CDFnode*)nclistget(dimensions,i);
size *= dim->dim.declsize;
}
return size;
}
2011-11-14 12:20:19 +08:00
/* Return value of param or NULL if not found */
2011-11-14 12:20:19 +08:00
const char*
dapparamvalue(NCDAPCOMMON* nccomm, const char* key)
2011-11-14 12:20:19 +08:00
{
const char* value;
if(nccomm == NULL || key == NULL) return 0;
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
value=ncurifragmentlookup(nccomm->oc.url,key);
2011-11-14 12:20:19 +08:00
return value;
}
static const char* checkseps = "+,:;";
2011-04-18 02:56:10 +08:00
/* Search for substring in value of param. If substring == NULL; then just
check if param is defined.
*/
int
dapparamcheck(NCDAPCOMMON* nccomm, const char* key, const char* subkey)
2011-04-18 02:56:10 +08:00
{
const char* value;
2011-11-14 12:20:19 +08:00
char* p;
if(nccomm == NULL || key == NULL) return 0;
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
if((value=ncurifragmentlookup(nccomm->oc.url,key)) == NULL)
2011-11-14 12:20:19 +08:00
return 0;
if(subkey == NULL) return 1;
p = strstr(value,subkey);
if(p == NULL) return 0;
p += strlen(subkey);
if(*p != '\0' && strchr(checkseps,*p) == NULL) return 0;
return 1;
2011-04-18 02:56:10 +08:00
}
/* This is NOT UNION */
2011-04-18 02:56:10 +08:00
int
nclistconcat(NClist* l1, NClist* l2)
{
unsigned int i;
for(i=0;i<nclistlength(l2);i++) nclistpush(l1,nclistget(l2,i));
return 1;
}
int
nclistminus(NClist* l1, NClist* l2)
{
unsigned int i,len,found;
len = nclistlength(l2);
found = 0;
for(i=0;i<len;i++) {
if(nclistdeleteall(l1,nclistget(l2,i))) found = 1;
}
return found;
}
int
nclistdeleteall(NClist* l, void* elem)
2011-04-18 02:56:10 +08:00
{
int i; /* do not make unsigned */
unsigned int len,found;
found = 0;
len = nclistlength(l);
for(i=len-1;i>=0;i--) {
void* test = nclistget(l,i);
2011-04-18 02:56:10 +08:00
if(test==elem) {
nclistremove(l,i);
found=1;
}
}
return found;
2011-04-18 02:56:10 +08:00
}
2012-02-04 05:31:50 +08:00
/* Collect the set of container nodes ending in "container"*/
void
collectnodepath(CDFnode* node, NClist* path, int withdataset)
2011-11-14 12:20:19 +08:00
{
2012-02-04 05:31:50 +08:00
if(node == NULL) return;
nclistpush(path,(void*)node);
2012-02-04 05:31:50 +08:00
while(node->container != NULL) {
node = node->container;
if(!withdataset && node->nctype == NC_Dataset) break;
nclistinsert(path,0,(void*)node);
2012-02-04 05:31:50 +08:00
}
2011-11-14 12:20:19 +08:00
}
2012-02-04 05:31:50 +08:00
/* Like collectnodepath3, but in ocspace */
void
collectocpath(OClink conn, OCddsnode node, NClist* path)
2012-02-04 05:31:50 +08:00
{
OCddsnode container;
2012-02-04 05:31:50 +08:00
OCtype octype;
if(node == NULL) return;
oc_dds_class(conn,node,&octype);
2013-02-26 12:31:06 +08:00
if(octype != OC_Dataset) {
oc_dds_container(conn,node,&container);
if(container != NULL)
collectocpath(conn,container,path);
}
nclistpush(path,(void*)node);
2012-02-04 05:31:50 +08:00
}
2011-11-14 12:20:19 +08:00
char*
makeocpathstring(OClink conn, OCddsnode node, const char* sep)
2011-11-14 12:20:19 +08:00
{
2013-02-26 12:31:06 +08:00
int i,len,first;
char* result;
char* name;
2012-02-04 05:31:50 +08:00
OCtype octype;
2013-02-26 12:31:06 +08:00
NClist* ocpath = NULL;
NCbytes* pathname = NULL;
/* If we are asking for the dataset path only,
2018-04-24 06:38:08 +08:00
then include it, otherwise elide it
2013-02-26 12:31:06 +08:00
*/
oc_dds_type(conn,node,&octype);
if(octype == OC_Dataset) {
oc_dds_name(conn,node,&name);
return nulldup(name);
}
2012-02-04 05:31:50 +08:00
2013-02-26 12:31:06 +08:00
ocpath = nclistnew();
2012-02-04 05:31:50 +08:00
collectocpath(conn,node,ocpath);
len = nclistlength(ocpath);
assert(len > 0); /* dataset at least */
2013-02-26 12:31:06 +08:00
pathname = ncbytesnew();
for(first=1,i=1;i<len;i++) { /* start at 1 to skip dataset name */
OCddsnode node = (OCddsnode)nclistget(ocpath,i);
2012-02-04 05:31:50 +08:00
char* name;
oc_dds_type(conn,node,&octype);
oc_dds_name(conn,node,&name);
2013-02-26 12:31:06 +08:00
if(!first) ncbytescat(pathname,sep);
ncbytescat(pathname,name);
2012-02-04 05:31:50 +08:00
nullfree(name);
first = 0;
}
2013-02-26 12:31:06 +08:00
result = ncbytesextract(pathname);
ncbytesfree(pathname);
2012-02-04 05:31:50 +08:00
nclistfree(ocpath);
2013-02-26 12:31:06 +08:00
return result;
2011-11-14 12:20:19 +08:00
}
2012-02-04 05:31:50 +08:00
char*
makepathstring(NClist* path, const char* separator, int flags)
2011-04-18 02:56:10 +08:00
{
2013-02-26 12:31:06 +08:00
int i,len,first;
NCbytes* pathname = NULL;
char* result;
CDFnode* node;
2011-04-18 02:56:10 +08:00
len = nclistlength(path);
2012-02-04 05:31:50 +08:00
ASSERT(len > 0); /* dataset at least */
2013-02-26 12:31:06 +08:00
if(len == 1) {/* dataset only */
node = (CDFnode*)nclistget(path,0);
return nulldup(node->ncbasename);
2011-04-18 02:56:10 +08:00
}
2013-02-26 12:31:06 +08:00
pathname = ncbytesnew();
2011-04-18 02:56:10 +08:00
for(first=1,i=0;i<len;i++) {
CDFnode* node = (CDFnode*)nclistget(path,i);
2011-11-14 12:20:19 +08:00
char* name;
2012-02-04 05:31:50 +08:00
if(!node->elided || (flags & PATHELIDE)==0) {
if(node->nctype != NC_Dataset) {
name = node->ncbasename;
2013-02-26 12:31:06 +08:00
assert(name != NULL);
if(!first) ncbytescat(pathname,separator);
ncbytescat(pathname,name);
2012-02-04 05:31:50 +08:00
first = 0;
}
}
2011-04-18 02:56:10 +08:00
}
2013-02-26 12:31:06 +08:00
result = ncbytesextract(pathname);
ncbytesfree(pathname);
return result;
2011-04-18 02:56:10 +08:00
}
2012-02-04 05:31:50 +08:00
/* convert path to string using the ncname field */
char*
makecdfpathstring(CDFnode* var, const char* separator)
2012-02-04 05:31:50 +08:00
{
char* spath;
NClist* path = nclistnew();
collectnodepath(var,path,WITHDATASET); /* <= note */
spath = makepathstring(path,separator,PATHNC);
2012-02-04 05:31:50 +08:00
nclistfree(path);
return spath;
}
2011-04-18 02:56:10 +08:00
/* Collect the set names of container nodes ending in "container"*/
void
clonenodenamepath(CDFnode* node, NClist* path, int withdataset)
2011-04-18 02:56:10 +08:00
{
if(node == NULL) return;
/* stop at the dataset container as well*/
if(node->nctype != NC_Dataset)
clonenodenamepath(node->container,path,withdataset);
2011-04-18 02:56:10 +08:00
if(node->nctype != NC_Dataset || withdataset)
nclistpush(path,(void*)nulldup(node->ncbasename));
2011-04-18 02:56:10 +08:00
}
char*
simplepathstring(NClist* names, char* separator)
2011-04-18 02:56:10 +08:00
{
int i;
size_t len;
char* result;
if(nclistlength(names) == 0) return nulldup("");
for(len=0,i=0;i<nclistlength(names);i++) {
char* name = (char*)nclistget(names,i);
len += strlen(name);
len += strlen(separator);
}
len++; /* room for strlcat to null terminate */
result = (char*)malloc(len+1);
2011-04-18 02:56:10 +08:00
result[0] = '\0';
for(i=0;i<nclistlength(names);i++) {
char* segment = (char*)nclistget(names,i);
if(i > 0) strlcat(result,separator,len);
strlcat(result,segment,len);
2011-04-18 02:56:10 +08:00
}
return result;
}
/* Define a number of location tests */
/* Is node contained (transitively) in a sequence ? */
BOOL
dapinsequence(CDFnode* node)
{
if(node == NULL || node->container == NULL) return TRUE;
for(node=node->container;node->nctype != NC_Dataset;node=node->container) {
if(node->nctype == NC_Sequence) return TRUE;
}
return FALSE;
}
/* Is node contained (transitively) in a structure array */
BOOL
dapinstructarray(CDFnode* node)
{
if(node == NULL) return TRUE;
for(node=node->container;node->nctype != NC_Dataset;node=node->container) {
if(node->nctype == NC_Structure
&& nclistlength(node->array.dimset0) > 0)
return TRUE;
}
return FALSE;
}
2011-04-18 02:56:10 +08:00
/* Is node a map field of a grid? */
BOOL
dapgridmap(CDFnode* node)
{
if(node != NULL && node->container != NULL
&& node->container->nctype == NC_Grid) {
CDFnode* array = (CDFnode*)nclistget(node->container->subnodes,0);
return (node != array);
}
return FALSE;
}
/* Is node an array field of a grid? */
BOOL
dapgridarray(CDFnode* node)
{
if(node != NULL && node->container != NULL
&& node->container->nctype == NC_Grid) {
CDFnode* array = (CDFnode*)nclistget(node->container->subnodes,0);
return (node == array);
}
return FALSE;
}
BOOL
dapgridelement(CDFnode* node)
{
return dapgridarray(node)
|| dapgridmap(node);
}
/* Is node a top-level grid node? */
BOOL
daptopgrid(CDFnode* grid)
{
if(grid == NULL || grid->nctype != NC_Grid) return FALSE;
return daptoplevel(grid);
}
/* Is node a top-level sequence node? */
BOOL
daptopseq(CDFnode* seq)
{
if(seq == NULL || seq->nctype != NC_Sequence) return FALSE;
return daptoplevel(seq);
}
/* Is node a top-level node? */
BOOL
daptoplevel(CDFnode* node)
{
if(node->container == NULL
2011-04-18 02:56:10 +08:00
|| node->container->nctype != NC_Dataset) return FALSE;
return TRUE;
}
unsigned int
modeldecode(int translation, const char* smodel,
const struct NCTMODEL* models,
unsigned int dfalt)
{
for(;models->translation;models++) {
if(translation != models->translation) continue;
if(smodel == models->model
|| (models->model != NULL && strcasecmp(smodel,models->model)==0)) {
/* We have a match */
return models->flags;
}
}
return dfalt;
}
unsigned long
getlimitnumber(const char* limit)
{
size_t slen;
unsigned long multiplier = 1;
unsigned long lu;
if(limit == NULL) return 0;
slen = strlen(limit);
if(slen == 0) return 0;
switch (limit[slen-1]) {
case 'G': case 'g': multiplier = GIGBYTE; break;
case 'M': case 'm': multiplier = MEGBYTE; break;
case 'K': case 'k': multiplier = KILOBYTE; break;
2011-04-18 02:56:10 +08:00
default: break;
}
if(sscanf(limit,"%lu",&lu) != 1)
return 0;
2011-04-18 02:56:10 +08:00
return (lu*multiplier);
}
void
dapexpandescapes(char *termstring)
{
char *s, *t, *endp;
/* expand "\" escapes, e.g. "\t" to tab character;
will only shorten string length, never increase it
*/
2011-04-18 02:56:10 +08:00
s = termstring;
t = termstring;
while(*t) {
if (*t == '\\') {
t++;
switch (*t) {
case 'a':
*s++ = '\007'; t++; /* will use '\a' when STDC */
break;
case 'b':
*s++ = '\b'; t++;
break;
case 'f':
*s++ = '\f'; t++;
break;
case 'n':
*s++ = '\n'; t++;
break;
case 'r':
*s++ = '\r'; t++;
break;
case 't':
*s++ = '\t'; t++;
break;
case 'v':
*s++ = '\v'; t++;
break;
case '\\':
*s++ = '\\'; t++;
break;
case '?':
*s++ = '\177'; t++;
break;
case 'x':
t++; /* now t points to one or more hex digits */
*s++ = (char) strtol(t, &endp, 16);
t = endp;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7': {
/* t should now point to 3 octal digits */
int c;
c = t[0];
if(c == 0 || c < '0' || c > '7') goto normal;
c = t[1];
if(c == 0 || c < '0' || c > '7') goto normal;
c = t[2];
if(c == 0 || c < '0' || c > '7') goto normal;
c = ((t[0]-'0')<<6)+((t[1]-'0')<<3)+(t[2]-'0');
*s++ = (char)c;
t += 3;
} break;
default:
if(*t == 0)
*s++ = '\\';
else
*s++ = *t++;
break;
}
} else {
normal: *s++ = *t++;
}
}
*s = '\0';
return;
}
#ifdef HAVE_GETTIMEOFDAY
2011-04-18 02:56:10 +08:00
static double
deltatime(struct timeval time0, struct timeval time1)
2011-04-18 02:56:10 +08:00
{
double t0, t1;
t0 = ((double)time0.tv_sec);
t0 += ((double)time0.tv_usec) / 1000000.0;
t1 = ((double)time1.tv_sec);
t1 += ((double)time1.tv_usec) / 1000000.0;
return (t1 - t0);
}
#endif
/* Provide a wrapper for oc_fetch so we can log what it does */
NCerror
dap_fetch(NCDAPCOMMON* nccomm, OClink conn, const char* ce,
OCdxd dxd, OCddsnode* rootp)
2011-04-18 02:56:10 +08:00
{
NCerror ncstat = NC_NOERR;
OCerror ocstat = OC_NOERR;
char* ext = NULL;
int httpcode = 0;
Yet another fix for DAP2 double URL encoding. re: https://github.com/Unidata/netcdf-c/issues/1876 and: https://github.com/Unidata/netcdf-c/pull/1835 and: https://github.com/Unidata/netcdf4-python/issues/1041 The change in PR 1835 was correct with respect to using %20 instead of '+' for encoding blanks. However, it was a mistake to assume everything was unencoded and then to do encoding ourselves. The problem is that different servers do different things, with Columbia being an outlier. So, I have added a set of client controls that can at least give the caller some control over this. The caller can append the following fragment to his URL to control what gets encoded before sending it to the server. The syntax is as follows: ```` https://<host>/<path>/<query>#encode=path|query|all|none ```` The possible values: * path -- URL encode (i.e. %xx encode) as needed in the path part of the URL. * query -- URL encode as needed in the query part of the URL. * all -- equivalent to ````#encode=path,query````. * none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness. Note that if "encode=" is used, then before it is processed, all encoding is turned of so that ````#encode=path```` will only encode the path and not the query. The default is ````#encode=query````, so the path is left untouched, but the query is always encoded. Internally, this required changes to pass the encode flags down into the OC2 library. Misc. Unrelated Changes: * Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
OCflags ocflags = 0;
#ifdef HAVE_GETTIMEOFDAY
struct timeval time0;
struct timeval time1;
#endif
2011-09-16 00:57:16 +08:00
if(dxd == OCDDS) ext = ".dds";
else if(dxd == OCDAS) ext = ".das";
else ext = ".dods";
if(ce != NULL && strlen(ce) == 0)
ce = NULL;
Yet another fix for DAP2 double URL encoding. re: https://github.com/Unidata/netcdf-c/issues/1876 and: https://github.com/Unidata/netcdf-c/pull/1835 and: https://github.com/Unidata/netcdf4-python/issues/1041 The change in PR 1835 was correct with respect to using %20 instead of '+' for encoding blanks. However, it was a mistake to assume everything was unencoded and then to do encoding ourselves. The problem is that different servers do different things, with Columbia being an outlier. So, I have added a set of client controls that can at least give the caller some control over this. The caller can append the following fragment to his URL to control what gets encoded before sending it to the server. The syntax is as follows: ```` https://<host>/<path>/<query>#encode=path|query|all|none ```` The possible values: * path -- URL encode (i.e. %xx encode) as needed in the path part of the URL. * query -- URL encode as needed in the query part of the URL. * all -- equivalent to ````#encode=path,query````. * none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness. Note that if "encode=" is used, then before it is processed, all encoding is turned of so that ````#encode=path```` will only encode the path and not the query. The default is ````#encode=query````, so the path is left untouched, but the query is always encoded. Internally, this required changes to pass the encode flags down into the OC2 library. Misc. Unrelated Changes: * Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
if(FLAGSET(nccomm->controls,NCF_UNCONSTRAINABLE))
ce = NULL;
Yet another fix for DAP2 double URL encoding. re: https://github.com/Unidata/netcdf-c/issues/1876 and: https://github.com/Unidata/netcdf-c/pull/1835 and: https://github.com/Unidata/netcdf4-python/issues/1041 The change in PR 1835 was correct with respect to using %20 instead of '+' for encoding blanks. However, it was a mistake to assume everything was unencoded and then to do encoding ourselves. The problem is that different servers do different things, with Columbia being an outlier. So, I have added a set of client controls that can at least give the caller some control over this. The caller can append the following fragment to his URL to control what gets encoded before sending it to the server. The syntax is as follows: ```` https://<host>/<path>/<query>#encode=path|query|all|none ```` The possible values: * path -- URL encode (i.e. %xx encode) as needed in the path part of the URL. * query -- URL encode as needed in the query part of the URL. * all -- equivalent to ````#encode=path,query````. * none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness. Note that if "encode=" is used, then before it is processed, all encoding is turned of so that ````#encode=path```` will only encode the path and not the query. The default is ````#encode=query````, so the path is left untouched, but the query is always encoded. Internally, this required changes to pass the encode flags down into the OC2 library. Misc. Unrelated Changes: * Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
if(FLAGSET(nccomm->controls,NCF_ONDISK))
ocflags |= OCONDISK;
if(FLAGSET(nccomm->controls,NCF_ENCODE_PATH))
ocflags |= OCENCODEPATH;
if(FLAGSET(nccomm->controls,NCF_ENCODE_QUERY))
ocflags |= OCENCODEQUERY;
if(SHOWFETCH) {
/* Build uri string minus the constraint and #tag */
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
char* baseurl = ncuribuild(nccomm->oc.url,NULL,ext,NCURIBASE);
2011-04-18 02:56:10 +08:00
if(ce == NULL)
LOG1(NCLOGNOTE,"fetch: %s",baseurl);
else
LOG2(NCLOGNOTE,"fetch: %s?%s",baseurl,ce);
2011-11-14 12:20:19 +08:00
nullfree(baseurl);
2011-04-18 02:56:10 +08:00
#ifdef HAVE_GETTIMEOFDAY
gettimeofday(&time0,NULL);
#endif
}
Yet another fix for DAP2 double URL encoding. re: https://github.com/Unidata/netcdf-c/issues/1876 and: https://github.com/Unidata/netcdf-c/pull/1835 and: https://github.com/Unidata/netcdf4-python/issues/1041 The change in PR 1835 was correct with respect to using %20 instead of '+' for encoding blanks. However, it was a mistake to assume everything was unencoded and then to do encoding ourselves. The problem is that different servers do different things, with Columbia being an outlier. So, I have added a set of client controls that can at least give the caller some control over this. The caller can append the following fragment to his URL to control what gets encoded before sending it to the server. The syntax is as follows: ```` https://<host>/<path>/<query>#encode=path|query|all|none ```` The possible values: * path -- URL encode (i.e. %xx encode) as needed in the path part of the URL. * query -- URL encode as needed in the query part of the URL. * all -- equivalent to ````#encode=path,query````. * none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness. Note that if "encode=" is used, then before it is processed, all encoding is turned of so that ````#encode=path```` will only encode the path and not the query. The default is ````#encode=query````, so the path is left untouched, but the query is always encoded. Internally, this required changes to pass the encode flags down into the OC2 library. Misc. Unrelated Changes: * Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
ocstat = oc_fetch(conn,ce,dxd,ocflags,rootp);
2011-04-18 02:56:10 +08:00
if(FLAGSET(nccomm->controls,NCF_SHOWFETCH)) {
#ifdef HAVE_GETTIMEOFDAY
double secs;
gettimeofday(&time1,NULL);
secs = deltatime(time0,time1);
2011-04-18 02:56:10 +08:00
nclog(NCLOGNOTE,"fetch complete: %0.3f secs",secs);
#else
nclog(NCLOGNOTE,"fetch complete.");
#endif
}
2011-11-14 12:20:19 +08:00
#ifdef DEBUG2
fprintf(stderr,"fetch: dds:\n");
oc_dumpnode(conn,*rootp);
#endif
/* Look at the HTTP return code */
httpcode = oc_httpcode(conn);
if(httpcode < 400) {
ncstat = ocerrtoncerr(ocstat);
} else if(httpcode >= 500) {
ncstat = NC_EDAPSVC;
} else if(httpcode == 401) {
This PR adds EXPERIMENTAL support for accessing data in the cloud using a variant of the Zarr protocol and storage format. This enhancement is generically referred to as "NCZarr". The data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. In this sense it is similar to the CDF-5 data model. More detailed information about enabling and using NCZarr is described in the document NUG/nczarr.md and in a [Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in). WARNING: this code has had limited testing, so do use this version for production work. Also, performance improvements are ongoing. Note especially the following platform matrix of successful tests: Platform | Build System | S3 support ------------------------------------ Linux+gcc | Automake | yes Linux+gcc | CMake | yes Visual Studio | CMake | no Additionally, and as a consequence of the addition of NCZarr, major changes have been made to the Filter API. NOTE: NCZarr does not yet support filters, but these changes are enablers for that support in the future. Note that it is possible (probable?) that there will be some accidental reversions if the changes here did not correctly mimic the existing filter testing. In any case, previously filter ids and parameters were of type unsigned int. In order to support the more general zarr filter model, this was all converted to char*. The old HDF5-specific, unsigned int operations are still supported but they are wrappers around the new, char* based nc_filterx_XXX functions. This entailed at least the following changes: 1. Added the files libdispatch/dfilterx.c and include/ncfilter.h 2. Some filterx utilities have been moved to libdispatch/daux.c 3. A new entry, "filter_actions" was added to the NCDispatch table and the version bumped. 4. An overly complex set of structs was created to support funnelling all of the filterx operations thru a single dispatch "filter_actions" entry. 5. Move common code to from libhdf5 to libsrc4 so that it is accessible to nczarr. Changes directly related to Zarr: 1. Modified CMakeList.txt and configure.ac to support both C and C++ -- this is in support of S3 support via the awd-sdk libraries. 2. Define a size64_t type to support nczarr. 3. More reworking of libdispatch/dinfermodel.c to support zarr and to regularize the structure of the fragments section of a URL. Changes not directly related to Zarr: 1. Make client-side filter registration be conditional, with default off. 2. Hack include/nc4internal.h to make some flags added by Ed be unique: e.g. NC_CREAT, NC_INDEF, etc. 3. cleanup include/nchttp.h and libdispatch/dhttp.c. 4. Misc. changes to support compiling under Visual Studio including: * Better testing under windows for dirent.h and opendir and closedir. 5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags and to centralize error reporting. 6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them. 7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible. Changes Left TO-DO: 1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
ncstat = NC_EACCESS;
} else if(httpcode == 403) {
ncstat = NC_EAUTH;
} else if(httpcode == 404) {
ncstat = NC_ENOTFOUND;
} else {
ncstat = NC_EACCESS;
}
return ncstat;
2011-04-18 02:56:10 +08:00
}
/* Check a name to see if it contains illegal dap characters
*/
static const char* baddapchars = "./";
int
dap_badname(char* name)
{
const char* p;
if(name == NULL) return 0;
2013-02-05 02:49:48 +08:00
for(p=baddapchars;*p;p++) {
if(strchr(name,*p) != NULL)
return 1;
}
return 0;
}
#if 0
2013-02-05 02:49:48 +08:00
/* Repair a dap name */
char*
dap_repairname(char* name)
{
/* assume that dap_badname was called on this name and returned 1 */
return repairname(name,baddapchars);
}
#endif
2013-02-05 02:49:48 +08:00
/* Check a name to see if it contains illegal dap characters
and repair them
*/
2012-11-20 05:26:34 +08:00
static const char* hexdigits = "0123456789abcdef";
2013-02-05 02:49:48 +08:00
static char*
repairname(const char* name, const char* badchars)
{
char* newname;
2013-02-05 02:49:48 +08:00
const char *p;
char *q;
int c;
int nnlen = 0;
if(name == NULL) return NULL;
nnlen = (3*strlen(name)); /* max needed */
nnlen++; /* room for strlcat to add nul */
newname = (char*)malloc(1+nnlen); /* max needed */
newname[0] = '\0'; /* so we can use strlcat */
for(p=name,q=newname;(c=*p);p++) {
if(strchr(badchars,c) != NULL) {
2012-11-20 05:26:34 +08:00
int digit;
char newchar[4];
newchar[0] = '%';
2012-11-20 05:26:34 +08:00
digit = (c & 0xf0) >> 4;
newchar[1] = hexdigits[digit];
digit = (c & 0x0f);
2012-11-20 05:26:34 +08:00
newchar[2] = hexdigits[digit];
newchar[3] = '\0';
strlcat(newname,newchar,nnlen);
2013-01-31 05:33:31 +08:00
q += 3; /*strlen(newchar)*/
} else
*q++ = c;
*q = '\0'; /* so we can always do strlcat */
}
*q = '\0'; /* ensure trailing null */
return newname;
}
Primary change: add dap4 support Specific changes: 1. Add dap4 code: libdap4 and dap4_test. Note that until the d4ts server problem is solved, dap4 is turned off. 2. Modify various files to support dap4 flags: configure.ac, Makefile.am, CMakeLists.txt, etc. 3. Add nc_test/test_common.sh. This centralizes the handling of the locations of various things in the build tree: e.g. where is ncgen.exe located. See nc_test/test_common.sh for details. 4. Modify .sh files to use test_common.sh 5. Obsolete separate oc2 by moving it to be part of netcdf-c. This means replacing code with netcdf-c equivalents. 5. Add --with-testserver to configure.ac to allow override of the servers to be used for --enable-dap-remote-tests. 6. There were multiple versions of nctypealignment code. Try to centralize in libdispatch/doffset.c and include/ncoffsets.h 7. Add a unit test for the ncuri code because of its complexity. 8. Move the findserver code out of libdispatch and into a separate, self contained program in ncdap_test and dap4_test. 9. Move the dispatch header files (nc{3,4}dispatch.h) to .../include because they are now shared by modules. 10. Revamp the handling of TOPSRCDIR and TOPBUILDDIR for shell scripts. 11. Make use of MREMAP if available 12. Misc. minor changes e.g. - #include <config.h> -> #include "config.h" - Add some no-install headers to /include - extern -> EXTERNL and vice versa as needed - misc header cleanup - clean up checking for misc. unix vs microsoft functions 13. Change copyright decls in some files to point to LICENSE file. 14. Add notes to RELEASENOTES.md
2017-03-09 08:01:10 +08:00
char*
dap_getselection(NCURI* uri)
{
char* p;
char* q = uri->query;
if(q == NULL) return NULL;
p = strchr(q,'&');
if(p == NULL) return NULL;
return strdup(p+1);
}
/* Compute padding */
static int
nccpadding(unsigned long offset, int alignment)
{
int pad,rem;
rem = (alignment==0?0:(offset % alignment));
pad = (rem==0?0:(alignment - rem));
return pad;
}
Yet another fix for DAP2 double URL encoding. re: https://github.com/Unidata/netcdf-c/issues/1876 and: https://github.com/Unidata/netcdf-c/pull/1835 and: https://github.com/Unidata/netcdf4-python/issues/1041 The change in PR 1835 was correct with respect to using %20 instead of '+' for encoding blanks. However, it was a mistake to assume everything was unencoded and then to do encoding ourselves. The problem is that different servers do different things, with Columbia being an outlier. So, I have added a set of client controls that can at least give the caller some control over this. The caller can append the following fragment to his URL to control what gets encoded before sending it to the server. The syntax is as follows: ```` https://<host>/<path>/<query>#encode=path|query|all|none ```` The possible values: * path -- URL encode (i.e. %xx encode) as needed in the path part of the URL. * query -- URL encode as needed in the query part of the URL. * all -- equivalent to ````#encode=path,query````. * none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness. Note that if "encode=" is used, then before it is processed, all encoding is turned of so that ````#encode=path```` will only encode the path and not the query. The default is ````#encode=query````, so the path is left untouched, but the query is always encoded. Internally, this required changes to pass the encode flags down into the OC2 library. Misc. Unrelated Changes: * Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
int
dapparamparselist(const char* s0, int delim, NClist* list)
{
int stat = NC_NOERR;
char* s = strdup(s0);
char* p;
int i,count = 1;
if(s0 == NULL || strlen(s) == 0) goto done;
for(p=s;*p;p++) {if(*p == delim) {*p = '\0'; count++;}}
for(i=0,p=s;i<count;i++,p+=(strlen(p)+1)) {
if(strlen(p)>0)
nclistpush(list,strdup(p));
}
done:
return stat;
}