2012-08-09 07:15:18 +08:00
|
|
|
/*********************************************************************
|
2018-12-07 05:29:57 +08:00
|
|
|
* Copyright 2018, UCAR/Unidata
|
2012-08-09 07:15:18 +08:00
|
|
|
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
|
|
|
* $Header$
|
|
|
|
*********************************************************************/
|
2012-09-28 06:50:41 +08:00
|
|
|
#include "config.h"
|
2012-08-09 07:15:18 +08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include "ncuri.h"
|
2017-03-09 08:01:10 +08:00
|
|
|
#include "ncbytes.h"
|
|
|
|
#include "nclist.h"
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2014-08-02 00:47:20 +08:00
|
|
|
/* Include netcdf.h to allow access to
|
|
|
|
NC_ error return codes. */
|
|
|
|
#include "netcdf.h"
|
|
|
|
|
2012-08-09 07:15:18 +08:00
|
|
|
#define NCURIDEBUG
|
|
|
|
|
2017-07-06 04:39:23 +08:00
|
|
|
/* Extra debug info */
|
|
|
|
#undef NCXDEBUG
|
|
|
|
|
2012-08-09 07:15:18 +08:00
|
|
|
#ifdef NCURIDEBUG
|
2017-03-09 08:01:10 +08:00
|
|
|
#define THROW(n) {ret=(n); goto done;}
|
2012-08-09 07:15:18 +08:00
|
|
|
#else
|
2017-03-09 08:01:10 +08:00
|
|
|
#define THROW(n) {goto done;}
|
2012-08-09 07:15:18 +08:00
|
|
|
#endif
|
|
|
|
|
|
|
|
#define PADDING 8
|
|
|
|
|
|
|
|
#define LBRACKET '['
|
|
|
|
#define RBRACKET ']'
|
|
|
|
#define EOFCHAR '\0'
|
2017-03-09 08:01:10 +08:00
|
|
|
#define RBRACKETSTR "]"
|
|
|
|
|
|
|
|
#define DRIVELETTERS "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
#ifndef FIX
|
|
|
|
#define FIX(s) ((s)==NULL?"NULL":(s))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef NILLEN
|
|
|
|
#define NILLEN(s) ((s)==NULL?0:strlen(s))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifndef nulldup
|
|
|
|
#define nulldup(s) ((s)==NULL?NULL:strdup(s))
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define terminate(p) {*(p) = EOFCHAR;}
|
|
|
|
|
|
|
|
#define endof(p) ((p)+strlen(p))
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
#define lshift(buf,buflen) {memmove(buf,buf+1,buflen+1);}
|
|
|
|
#define rshift(buf,buflen) {memmove(buf+1,buf,buflen+1);}
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
/* Allowable character sets for encode */
|
2020-09-09 02:41:12 +08:00
|
|
|
|
|
|
|
/* ascii = " !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~" */
|
|
|
|
|
|
|
|
/* Classes according to the URL RFC" */
|
|
|
|
#define RFCRESERVED " !*'();:@&=+$,/?#[]"
|
|
|
|
#define RFCUNRESERVED "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~"
|
|
|
|
#define RFCOTHER "\"%<>\\^`{|}"
|
|
|
|
|
|
|
|
/* I really hate the URL encoding mess */
|
|
|
|
|
2019-03-31 04:06:20 +08:00
|
|
|
static const char* pathallow =
|
2012-08-09 07:15:18 +08:00
|
|
|
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!#$&'()*+,-./:;=?@_~";
|
|
|
|
|
2019-03-31 04:06:20 +08:00
|
|
|
static const char* queryallow =
|
2012-08-09 07:15:18 +08:00
|
|
|
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!#$&'()*+,-./:;=?@_~";
|
|
|
|
|
2017-08-31 09:13:52 +08:00
|
|
|
/* user+pwd allow = path allow - "@:" */
|
2019-03-31 04:06:20 +08:00
|
|
|
static const char* userpwdallow =
|
2017-08-31 09:13:52 +08:00
|
|
|
"0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!$&'()*+,-.;=_~?#/";
|
2017-08-30 04:11:15 +08:00
|
|
|
|
2019-01-11 23:46:05 +08:00
|
|
|
#ifndef HAVE_STRNDUP
|
2014-08-08 07:03:27 +08:00
|
|
|
#define strndup ncstrndup
|
|
|
|
/* Not all systems have strndup, so provide one*/
|
|
|
|
char*
|
|
|
|
ncstrndup(const char* s, size_t len)
|
|
|
|
{
|
|
|
|
char* dup;
|
|
|
|
if(s == NULL) return NULL;
|
|
|
|
dup = (char*)malloc(len+1);
|
|
|
|
if(dup == NULL) return NULL;
|
|
|
|
memcpy((void*)dup,s,len);
|
|
|
|
dup[len] = '\0';
|
|
|
|
return dup;
|
|
|
|
}
|
|
|
|
#endif
|
2012-08-09 07:15:18 +08:00
|
|
|
/* Forward */
|
2017-03-09 08:01:10 +08:00
|
|
|
static int collectprefixparams(char* text, char** nextp);
|
|
|
|
static void freestringlist(NClist* list);
|
|
|
|
static void freestringvec(char** list);
|
2012-08-09 07:15:18 +08:00
|
|
|
static int ncfind(char** params, const char* key);
|
|
|
|
static char* nclocate(char* p, const char* charlist);
|
2019-09-30 02:59:28 +08:00
|
|
|
static int parselist(const char* ptext, NClist* list);
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
static int unparselist(const char** vec, const char* prefix, int encode, char** svecp);
|
|
|
|
static int ensurefraglist(NCURI* uri);
|
|
|
|
static int ensurequerylist(NCURI* uri);
|
|
|
|
static void buildlist(const char** list, int encode, NCbytes* buf);
|
|
|
|
static void removedups(NClist* list);
|
|
|
|
static int extendenvv(char*** envvp, int amount, int* oldlen);
|
2017-03-09 08:01:10 +08:00
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/*
|
|
|
|
A note about parameter support:
|
|
|
|
In the original url format for opendap (dap2), client parameters were
|
|
|
|
assumed to be one or more instances of bracketed pairs: e.g
|
|
|
|
"[...][...]...".
|
|
|
|
These were assumed to be placed at the front of the url. In this newer
|
|
|
|
version, the parameters may be encoded after a trailing # character each
|
|
|
|
separated by ampersand (&). For back compatibility, the bracketed
|
|
|
|
parameter form is supported. However, if ncuribuild is used, all
|
|
|
|
parameters will be converted to the
|
|
|
|
#...&...& format.
|
|
|
|
In any case, each parameter in turn is assumed to be a of the form
|
|
|
|
<name>=<value> or <name>; e.g. #x=y&z&a=b&w.
|
|
|
|
If the same parameter is specified more than once, then the first
|
|
|
|
occurrence is used; this is so that is possible to forcibly override
|
|
|
|
user specified parameters by prefixing.
|
|
|
|
IMPORTANT: the client parameter string is assumed to have blanks compressed out.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/**************************************************/
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2019-09-30 02:59:28 +08:00
|
|
|
/* Do a simple uri parse: return NC_NOERR if success, NC_EXXX if failed */
|
2012-08-09 07:15:18 +08:00
|
|
|
int
|
2012-08-20 05:54:30 +08:00
|
|
|
ncuriparse(const char* uri0, NCURI** durip)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
2019-09-30 02:59:28 +08:00
|
|
|
int ret = NC_NOERR;
|
2017-03-09 08:01:10 +08:00
|
|
|
NCURI tmp;
|
2012-08-09 07:15:18 +08:00
|
|
|
char* p;
|
2017-03-09 08:01:10 +08:00
|
|
|
char* q;
|
|
|
|
int isfile;
|
|
|
|
int hashost;
|
|
|
|
char* uri = NULL;
|
|
|
|
NCURI* duri = NULL;
|
|
|
|
char* prefix = NULL;
|
|
|
|
char* next = NULL;
|
|
|
|
NClist* params = nclistnew();
|
|
|
|
NClist* querylist = nclistnew();
|
|
|
|
size_t len0;
|
|
|
|
int pathchar;
|
|
|
|
|
2019-10-25 06:37:52 +08:00
|
|
|
tmp.fraglist = NULL;
|
|
|
|
tmp.querylist = NULL;
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
if(uri0 == NULL)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
|
|
|
|
len0 = strlen(uri0);
|
|
|
|
if(len0 == 0)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
|
|
|
|
/* Create a local NCURI instance to hold
|
|
|
|
pointers into the parsed string
|
|
|
|
*/
|
|
|
|
memset(&tmp,0,sizeof(tmp));
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
/* make mutable copy. Add some extra space
|
|
|
|
because we will need to null terminate the host section
|
|
|
|
without losing the first character of the path section.
|
|
|
|
*/
|
|
|
|
uri = (char*)malloc(len0+1+1); /* +2 for nul term and for host section terminator */
|
2012-08-09 07:15:18 +08:00
|
|
|
if(uri == NULL)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_ENOMEM);}
|
2017-03-09 08:01:10 +08:00
|
|
|
strncpy(uri,uri0,len0+1);
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
/* Walk the uri and do the following:
|
2017-03-09 08:01:10 +08:00
|
|
|
1. remove leading and trailing whitespace
|
|
|
|
2. convert all '\\' -> '\' (Temp hack to remove escape characters
|
|
|
|
inserted by Windows or MinGW)
|
2012-08-09 07:15:18 +08:00
|
|
|
*/
|
2017-03-09 08:01:10 +08:00
|
|
|
for(q=uri,p=uri;*p;p++) {if((*p == '\\' && p[1] == '\\') || *p < ' ') {continue;} else {*q++ = *p;}}
|
|
|
|
*q = '\0';
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
p = uri;
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
/* break up the url into coarse pieces */
|
2012-08-09 07:15:18 +08:00
|
|
|
if(*p == LBRACKET) {
|
2017-03-09 08:01:10 +08:00
|
|
|
prefix = p;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ret = collectprefixparams(p,&next); /* collect the prefix; convert to & form */
|
2019-09-30 02:59:28 +08:00
|
|
|
if(ret != NC_NOERR)
|
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
p = next;
|
|
|
|
} else {
|
|
|
|
prefix = NULL;
|
|
|
|
}
|
|
|
|
tmp.uri = p; /* will be the core */
|
|
|
|
/* Skip past the core of the url */
|
|
|
|
next = nclocate(p,"?#");
|
|
|
|
if(next != NULL) {
|
|
|
|
int c = *next;
|
|
|
|
terminate(next);
|
|
|
|
next++;
|
|
|
|
if(c == '?') {
|
|
|
|
tmp.query = next;
|
|
|
|
next = nclocate(next,"#");
|
|
|
|
if(next == NULL)
|
|
|
|
tmp.fragment = NULL;
|
|
|
|
else {
|
|
|
|
terminate(next);
|
|
|
|
next++;
|
|
|
|
tmp.fragment = next;
|
|
|
|
}
|
|
|
|
} else { /*c == '#'*/
|
|
|
|
tmp.fragment = next;
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Parse the prefix parameters */
|
|
|
|
if(prefix != NULL) {
|
2019-09-30 02:59:28 +08:00
|
|
|
if(parselist(prefix,params) != NC_NOERR)
|
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/* Parse the fragment parameters into the params list */
|
2017-03-09 08:01:10 +08:00
|
|
|
if(tmp.fragment != NULL) {
|
2019-09-30 02:59:28 +08:00
|
|
|
if(parselist(tmp.fragment,params) != NC_NOERR)
|
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/* Remove duplicates */
|
|
|
|
removedups(params);
|
2017-03-09 08:01:10 +08:00
|
|
|
if(nclistlength(params) > 0) {
|
|
|
|
nclistpush(params,NULL);
|
|
|
|
tmp.fraglist = nclistextract(params);
|
|
|
|
} else
|
|
|
|
tmp.fraglist = NULL;
|
|
|
|
/* Parse the query */
|
|
|
|
if(tmp.query != NULL) {
|
2019-09-30 02:59:28 +08:00
|
|
|
if(parselist(tmp.query,querylist) != NC_NOERR)
|
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
if(nclistlength(querylist) > 0) {
|
|
|
|
nclistpush(querylist,NULL);
|
|
|
|
tmp.querylist = nclistextract(querylist);
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
|
|
|
|
/* Now parse the core of the url */
|
|
|
|
p = tmp.uri;
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
/* Mark the protocol */
|
|
|
|
tmp.protocol = p;
|
2012-08-09 07:15:18 +08:00
|
|
|
p = strchr(p,':');
|
|
|
|
if(!p)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);}
|
2012-08-09 07:15:18 +08:00
|
|
|
terminate(p); /*overwrite colon*/
|
|
|
|
p++; /* skip the colon */
|
2017-03-09 08:01:10 +08:00
|
|
|
if(strlen(tmp.protocol)==0)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
/*
|
|
|
|
The legal formats for file: urls are a problem since
|
|
|
|
many variants are often accepted.
|
|
|
|
By RFC, the proper general format is: file://host/path,
|
|
|
|
where the 'host' can be omitted and defaults to 'localhost'.
|
|
|
|
and the path includes the leading '/'.
|
|
|
|
So, assuming no host, the format is: "file:///path".
|
|
|
|
Some implementations, however, ignore the host, and allow
|
|
|
|
the format: file:/path.
|
|
|
|
We also simplify things by assuming the host part is always empty.
|
|
|
|
which means we can have file:///path, but not file://..../path.
|
2019-09-30 02:59:28 +08:00
|
|
|
Note also in all cases, the leading '/' is considered part of the path,
|
2017-03-09 08:01:10 +08:00
|
|
|
which is then assumed to be an absolute path. But also note that
|
|
|
|
the windows drive letter has to be taken into account. Our rule is that
|
2018-06-09 02:08:33 +08:00
|
|
|
if the path looks like D:...,
|
2017-03-09 08:01:10 +08:00
|
|
|
where D is a single alphabetic letter (a-z or A-Z),
|
|
|
|
then it is a windows path and can be use in place of a /path.
|
2019-09-30 02:59:28 +08:00
|
|
|
Note also that it is desirable to support relative paths even
|
|
|
|
though the RFC technically does not allow this. This will occur
|
|
|
|
if the form is file://path where path does not start with '/'.
|
2017-03-09 08:01:10 +08:00
|
|
|
The rules implemented here (for file:) are then as follows
|
|
|
|
1. file:D:... : assume D: is a windows drive letter and treat D:... as the path
|
|
|
|
2. file:/X, where X does not start with a slash: treat /X as the path.
|
|
|
|
3. file://D:... : assume D: is a windows drive letter and treat as the path
|
|
|
|
4. file:///X, where X does not start with a slash: treat /X as the path.
|
2019-09-30 02:59:28 +08:00
|
|
|
5. file://X, where X does not start with a slash: treat X as the
|
|
|
|
relative path.
|
|
|
|
All other cases are disallowed.
|
2017-03-09 08:01:10 +08:00
|
|
|
*/
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
isfile = (strcmp(tmp.protocol,"file")==0);
|
|
|
|
if(isfile) {
|
2019-01-12 02:41:09 +08:00
|
|
|
size_t l = strlen(p); /* to test if we have enough characters */
|
2017-03-09 08:01:10 +08:00
|
|
|
hashost = 0; /* always */
|
|
|
|
if(l >= 2 && p[1] == ':' && strchr(DRIVELETTERS,p[0]) != NULL) { /* case 1 */
|
2018-07-06 20:52:02 +08:00
|
|
|
; /* p points to the start of the path */
|
2017-03-09 08:01:10 +08:00
|
|
|
} else if(l >= 2 && p[0] == '/' && p[1] != '/') { /* case 2 */
|
2018-07-06 20:52:02 +08:00
|
|
|
; /* p points to the start of the path */
|
2017-03-09 08:01:10 +08:00
|
|
|
} else if(l >= 4 && p[0] == '/' && p[1] == '/'
|
|
|
|
&& p[3] == ':' && strchr(DRIVELETTERS,p[2]) != NULL) { /* case 3 */
|
|
|
|
p = p+2; /* points to the start of the windows path */
|
|
|
|
} else if(l >= 4 && p[0] == '/' && p[1] == '/' && p[2] == '/' && p[3] != '/') { /* case 4 */
|
|
|
|
p += 2; /* points to the start of the path */
|
2019-09-30 02:59:28 +08:00
|
|
|
} else if(l >= 4 && p[0] == '/' && p[1] == '/' && p[2] != '/') { /* case 5 */
|
|
|
|
p += 2; /* points to the start of the path */
|
2017-03-09 08:01:10 +08:00
|
|
|
} else /* everything else is illegal */
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
} else {
|
|
|
|
if(p[0] != '/' || p[1] != '/') /* must be proto:// */
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
{THROW(NC_EURL);}
|
2017-03-09 08:01:10 +08:00
|
|
|
p += 2;
|
|
|
|
hashost = 1; /* Assume we have a hostname */
|
|
|
|
}
|
|
|
|
if(!hashost) {
|
|
|
|
tmp.path = p;
|
|
|
|
pathchar = EOFCHAR;
|
|
|
|
} else { /* assume there should be a host section */
|
|
|
|
/* We already extracted the query and/or fragment sections above,
|
|
|
|
splocate the end of the host section and therefore the start
|
|
|
|
of the path.
|
|
|
|
*/
|
|
|
|
tmp.host = p;
|
|
|
|
p = nclocate(p,"/");
|
|
|
|
if(p == NULL) { /* no path */
|
|
|
|
tmp.path = NULL; /* default */
|
|
|
|
pathchar = EOFCHAR;
|
2012-08-09 07:15:18 +08:00
|
|
|
} else {
|
2017-03-09 08:01:10 +08:00
|
|
|
tmp.path = p; /* save ptr to rest of the path */
|
|
|
|
pathchar = *p; /* save leading char of the path */
|
|
|
|
terminate(p); /* overwrite the leading char of the path; restored below */
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
/* Nullify tmp.host for consistency */
|
|
|
|
if(tmp.host != NULL && strlen(tmp.host)==0) {tmp.host = NULL;}
|
2014-08-02 00:47:20 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
if(tmp.host != NULL) {/* Parse the host section */
|
|
|
|
char* pp;
|
2012-08-09 07:15:18 +08:00
|
|
|
/* Check for leading user:pwd@ */
|
2017-03-09 08:01:10 +08:00
|
|
|
char* newhost = strchr(tmp.host,'@');
|
|
|
|
if(newhost != NULL) {
|
|
|
|
if(newhost == tmp.host)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* we have proto://@ */
|
2017-03-09 08:01:10 +08:00
|
|
|
terminate(newhost); /* overwrite '@' */
|
|
|
|
newhost++; /* should point past usr+pwd */
|
|
|
|
tmp.user = tmp.host;
|
|
|
|
/* Break user+pwd into two pieces */
|
|
|
|
pp = strchr(tmp.user,':');
|
|
|
|
if(pp == NULL)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* we have user only */
|
2017-03-09 08:01:10 +08:00
|
|
|
terminate(pp); /* overwrite ':' */
|
|
|
|
pp++;
|
|
|
|
if(strlen(tmp.user)==0)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* we have empty user */
|
2017-03-09 08:01:10 +08:00
|
|
|
if(strlen(pp)==0)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* we have empty password */
|
2018-06-09 02:08:33 +08:00
|
|
|
tmp.password = pp;
|
2017-07-06 04:39:23 +08:00
|
|
|
tmp.host = newhost;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
/* Breakup host into host + port */
|
|
|
|
pp = tmp.host;
|
|
|
|
pp = strchr(pp,':');
|
|
|
|
if(pp != NULL) { /* there is a port */
|
|
|
|
terminate(pp); /* overwrite ':' */
|
|
|
|
pp++; /* skip colon */
|
|
|
|
if(strlen(tmp.host) == 0)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* empty host */
|
2017-03-09 08:01:10 +08:00
|
|
|
if(strlen(pp)==0)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* empty port */
|
2017-03-09 08:01:10 +08:00
|
|
|
tmp.port = pp;
|
2012-08-09 07:15:18 +08:00
|
|
|
/* The port must look something like a number */
|
2017-03-09 08:01:10 +08:00
|
|
|
for(pp=tmp.port;*pp;pp++) {
|
|
|
|
if(strchr("0123456789-",*pp) == NULL)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_EURL);} /* probably not a real port, fail */
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
} /* else no port */
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
/* Fill in duri from tmp */
|
|
|
|
duri = (NCURI*)calloc(1,sizeof(NCURI));
|
|
|
|
if(duri == NULL)
|
2019-09-30 02:59:28 +08:00
|
|
|
{THROW(NC_ENOMEM);}
|
2017-03-09 08:01:10 +08:00
|
|
|
/* save original uri */
|
|
|
|
duri->uri = strdup(uri0);
|
|
|
|
duri->protocol = nulldup(tmp.protocol);
|
2017-08-30 04:11:15 +08:00
|
|
|
/* before saving, we need to decode the user+pwd */
|
|
|
|
duri->user = NULL;
|
|
|
|
duri->password = NULL;
|
2018-06-09 02:08:33 +08:00
|
|
|
if(tmp.user != NULL)
|
2017-08-30 04:11:15 +08:00
|
|
|
duri->user = ncuridecode(tmp.user);
|
|
|
|
if(tmp.password != NULL)
|
|
|
|
duri->password = ncuridecode(tmp.password);
|
2017-03-09 08:01:10 +08:00
|
|
|
duri->host = nulldup(tmp.host);
|
|
|
|
duri->port = nulldup(tmp.port);
|
|
|
|
if(tmp.path != NULL) {
|
|
|
|
/* We need to add back the previously overwritten path lead char (if necessary);
|
|
|
|
this must be done after all host section related pieces have been captured */
|
|
|
|
if(pathchar != EOFCHAR)
|
|
|
|
*tmp.path = pathchar;
|
|
|
|
duri->path = nulldup(tmp.path);
|
|
|
|
}
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
duri->query = NULL; /* let ensurequery fix this */
|
|
|
|
duri->fragment = NULL; /* let ensurefrag fix this */
|
2017-03-09 08:01:10 +08:00
|
|
|
duri->fraglist = tmp.fraglist; tmp.fraglist = NULL;
|
|
|
|
duri->querylist = tmp.querylist; tmp.querylist = NULL;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
|
|
|
|
/* make sure query and fragment strings are defined */
|
|
|
|
ensurequerylist(duri);
|
|
|
|
ensurefraglist(duri);
|
|
|
|
|
2018-06-09 02:08:33 +08:00
|
|
|
if(durip)
|
|
|
|
*durip = duri;
|
|
|
|
else
|
|
|
|
free(duri);
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
#ifdef NCXDEBUG
|
|
|
|
{
|
2012-08-20 05:54:30 +08:00
|
|
|
fprintf(stderr,"duri:");
|
2017-03-09 08:01:10 +08:00
|
|
|
fprintf(stderr," protocol=|%s|",FIX(duri->protocol));
|
|
|
|
fprintf(stderr," user=|%s|",FIX(duri->user));
|
|
|
|
fprintf(stderr," password=|%s|",FIX(duri->password));
|
|
|
|
fprintf(stderr," host=|%s|",FIX(duri->host));
|
|
|
|
fprintf(stderr," port=|%s|",FIX(duri->port));
|
|
|
|
fprintf(stderr," path=|%s|",FIX(duri->path));
|
|
|
|
fprintf(stderr," query=|%s|",FIX(duri->query));
|
|
|
|
fprintf(stderr," fragment=|%s|",FIX(duri->fragment));
|
2012-08-09 07:15:18 +08:00
|
|
|
fprintf(stderr,"\n");
|
|
|
|
}
|
|
|
|
#endif
|
2012-12-14 06:09:41 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
done:
|
|
|
|
if(uri != NULL)
|
2018-06-09 02:08:33 +08:00
|
|
|
free(uri);
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
freestringlist(params);
|
|
|
|
freestringlist(querylist);
|
2019-10-25 06:37:52 +08:00
|
|
|
if(tmp.fraglist)
|
|
|
|
freestringvec(tmp.fraglist);
|
|
|
|
if(tmp.querylist)
|
|
|
|
freestringvec(tmp.querylist);
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
return ret;
|
|
|
|
}
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
static void
|
|
|
|
freestringlist(NClist* list)
|
|
|
|
{
|
|
|
|
if(list != NULL) {
|
|
|
|
int i;
|
|
|
|
for(i=0;i<nclistlength(list);i++) {
|
|
|
|
void* p = nclistget(list,i);
|
|
|
|
nullfree(p);
|
|
|
|
}
|
|
|
|
nclistfree(list);
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
freestringvec(char** list)
|
|
|
|
{
|
|
|
|
if(list != NULL) {
|
2018-06-09 02:08:33 +08:00
|
|
|
char** p;
|
2017-03-09 08:01:10 +08:00
|
|
|
for(p=list;*p;p++) {nullfree(*p);}
|
|
|
|
nullfree(list);
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-08-20 05:54:30 +08:00
|
|
|
ncurifree(NCURI* duri)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
2012-08-20 05:54:30 +08:00
|
|
|
if(duri == NULL) return;
|
2017-03-09 08:01:10 +08:00
|
|
|
nullfree(duri->uri);
|
|
|
|
nullfree(duri->protocol);
|
|
|
|
nullfree(duri->user);
|
|
|
|
nullfree(duri->password);
|
|
|
|
nullfree(duri->host);
|
|
|
|
nullfree(duri->port);
|
|
|
|
nullfree(duri->path);
|
|
|
|
nullfree(duri->query);
|
|
|
|
nullfree(duri->fragment);
|
|
|
|
freestringvec(duri->querylist);
|
|
|
|
freestringvec(duri->fraglist);
|
2012-08-20 05:54:30 +08:00
|
|
|
free(duri);
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
/* Replace the protocol */
|
|
|
|
int
|
|
|
|
ncurisetprotocol(NCURI* duri,const char* protocol)
|
|
|
|
{
|
|
|
|
nullfree(duri->protocol);
|
|
|
|
duri->protocol = strdup(protocol);
|
2019-09-30 02:59:28 +08:00
|
|
|
return (NC_NOERR);
|
Codify cross-platform file paths
The netcdf-c code has to deal with a variety of platforms:
Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ
significantly in the kind of file paths that they accept. So in
order to handle this, I have created a set of replacements for
the most common file system operations such as _open_ or _fopen_
or _access_ to manage the file path differences correctly.
A more limited version of this idea was already implemented via
the ncwinpath.h and dwinpath.c code. So this can be viewed as a
replacement for that code. And in path in many cases, the only
change that was required was to replace '#include <ncwinpath.h>'
with '#include <ncpathmgt.h>' and then replace file operation
calls with the NCxxx equivalent from ncpathmgr.h Note that
recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull
request should not require dealing with winpath.
The heart of the change is include/ncpathmgmt.h, which provides
alternate operations such as NCfopen or NCaccess and which properly
parse and rebuild path arguments to work for the platform on which
the code is executing. This mostly matters for Windows because of the
way that it uses backslash and drive letters, as compared to *nix*.
One important feature is that the user can do string manipulations
on a file path without having to worry too much about the platform
because the path management code will properly handle most mixed cases.
So one can for example concatenate a path suffix that uses forward
slashes to a Windows path and have it work correctly.
The conversion code is in libdispatch/dpathmgr.c, and the
important function there is NCpathcvt which does the proper
conversions to the local path format.
As a rule, most code should just replace their file operations with
the corresponding NCxxx ones defined in include/ncpathmgmt.h. These
NCxxx functions all call NCpathcvt on their path arguments before
executing the actual file operation.
In some rare cases, the client may need to directly use NCpathcvt,
but this should be avoided as much as possible. If there is a need
for supporting a new file operation not already in ncpathmgmt.h, then
use the code in dpathmgr.c as a template. Also please notify Unidata
so we can include it as a formal part or our supported operations.
Also, if you see an operation in the library that is not using the
NCxxx form, then please submit an issue so we can fix it.
Misc. Changes:
* Clean up the utf8 testing code; it is impossible to get some
tests to work under windows using shell scripts; the args do
not pass as utf8 but as some other encoding.
* Added an extra utf8 test case: test_unicode_path.sh
* Add a true test for HDF5 1.10.6 or later because as noted in
PR https://github.com/Unidata/netcdf-c/pull/1794,
HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
|
|
|
}
|
|
|
|
|
2021-09-28 08:36:33 +08:00
|
|
|
/* Replace the host */
|
|
|
|
int
|
|
|
|
ncurisethost(NCURI* duri,const char* host)
|
|
|
|
{
|
|
|
|
nullfree(duri->host);
|
|
|
|
duri->host = strdup(host);
|
|
|
|
return (NC_NOERR);
|
|
|
|
}
|
|
|
|
|
Codify cross-platform file paths
The netcdf-c code has to deal with a variety of platforms:
Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ
significantly in the kind of file paths that they accept. So in
order to handle this, I have created a set of replacements for
the most common file system operations such as _open_ or _fopen_
or _access_ to manage the file path differences correctly.
A more limited version of this idea was already implemented via
the ncwinpath.h and dwinpath.c code. So this can be viewed as a
replacement for that code. And in path in many cases, the only
change that was required was to replace '#include <ncwinpath.h>'
with '#include <ncpathmgt.h>' and then replace file operation
calls with the NCxxx equivalent from ncpathmgr.h Note that
recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull
request should not require dealing with winpath.
The heart of the change is include/ncpathmgmt.h, which provides
alternate operations such as NCfopen or NCaccess and which properly
parse and rebuild path arguments to work for the platform on which
the code is executing. This mostly matters for Windows because of the
way that it uses backslash and drive letters, as compared to *nix*.
One important feature is that the user can do string manipulations
on a file path without having to worry too much about the platform
because the path management code will properly handle most mixed cases.
So one can for example concatenate a path suffix that uses forward
slashes to a Windows path and have it work correctly.
The conversion code is in libdispatch/dpathmgr.c, and the
important function there is NCpathcvt which does the proper
conversions to the local path format.
As a rule, most code should just replace their file operations with
the corresponding NCxxx ones defined in include/ncpathmgmt.h. These
NCxxx functions all call NCpathcvt on their path arguments before
executing the actual file operation.
In some rare cases, the client may need to directly use NCpathcvt,
but this should be avoided as much as possible. If there is a need
for supporting a new file operation not already in ncpathmgmt.h, then
use the code in dpathmgr.c as a template. Also please notify Unidata
so we can include it as a formal part or our supported operations.
Also, if you see an operation in the library that is not using the
NCxxx form, then please submit an issue so we can fix it.
Misc. Changes:
* Clean up the utf8 testing code; it is impossible to get some
tests to work under windows using shell scripts; the args do
not pass as utf8 but as some other encoding.
* Added an extra utf8 test case: test_unicode_path.sh
* Add a true test for HDF5 1.10.6 or later because as noted in
PR https://github.com/Unidata/netcdf-c/pull/1794,
HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
|
|
|
/* Replace the path */
|
|
|
|
int
|
|
|
|
ncurisetpath(NCURI* duri,const char* newpath)
|
|
|
|
{
|
|
|
|
nullfree(duri->path);
|
|
|
|
duri->path = strdup(newpath);
|
|
|
|
return (NC_NOERR);
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Replace the query */
|
|
|
|
int
|
|
|
|
ncurisetquery(NCURI* duri,const char* query)
|
|
|
|
{
|
2019-09-30 02:59:28 +08:00
|
|
|
int ret = NC_NOERR;
|
2018-06-09 02:08:33 +08:00
|
|
|
freestringvec(duri->querylist);
|
2017-03-09 08:01:10 +08:00
|
|
|
nullfree(duri->query);
|
|
|
|
duri->query = NULL;
|
|
|
|
duri->querylist = NULL;
|
|
|
|
if(query != NULL && strlen(query) > 0) {
|
|
|
|
NClist* params = nclistnew();
|
|
|
|
duri->query = strdup(query);
|
|
|
|
ret = parselist(duri->query,params);
|
2019-09-30 02:59:28 +08:00
|
|
|
if(ret != NC_NOERR)
|
2017-03-09 08:01:10 +08:00
|
|
|
{THROW(NC_EURL);}
|
|
|
|
nclistpush(params,NULL);
|
|
|
|
duri->querylist = nclistextract(params);
|
|
|
|
nclistfree(params);
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2019-09-30 02:59:28 +08:00
|
|
|
/* Replace the fragments*/
|
|
|
|
int
|
|
|
|
ncurisetfragments(NCURI* duri,const char* fragments)
|
|
|
|
{
|
|
|
|
int ret = NC_NOERR;
|
|
|
|
freestringvec(duri->fraglist);
|
|
|
|
nullfree(duri->fragment);
|
|
|
|
duri->fragment = NULL;
|
|
|
|
duri->fraglist = NULL;
|
|
|
|
if(fragments != NULL && strlen(fragments) > 0) {
|
|
|
|
duri->fragment = strdup(fragments);
|
|
|
|
}
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Replace a specific fragment key*/
|
|
|
|
int
|
|
|
|
ncurisetfragmentkey(NCURI* duri,const char* key, const char* value)
|
|
|
|
{
|
|
|
|
int ret = NC_NOERR;
|
|
|
|
int pos = -1;
|
|
|
|
char* newlist = NULL;
|
|
|
|
|
|
|
|
ensurefraglist(duri);
|
|
|
|
pos = ncfind(duri->fraglist, key);
|
|
|
|
if(pos < 0) return NC_EINVAL; /* does not exist */
|
|
|
|
nullfree(duri->fraglist[pos+1]);
|
|
|
|
duri->fraglist[pos+1] = strdup(value);
|
|
|
|
/* Rebuild the fragment */
|
|
|
|
if((ret = unparselist((const char**)duri->fraglist,"#",0,&newlist))) goto done;
|
|
|
|
nullfree(duri->fragment);
|
|
|
|
duri->fragment = newlist; newlist = NULL;
|
|
|
|
done:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Replace or add a specific fragment key*/
|
|
|
|
int
|
|
|
|
ncuriappendfragmentkey(NCURI* duri,const char* key, const char* value)
|
|
|
|
{
|
|
|
|
int ret = NC_NOERR;
|
|
|
|
int len;
|
|
|
|
int pos = -1;
|
|
|
|
char* newlist = NULL;
|
|
|
|
|
|
|
|
ensurefraglist(duri);
|
|
|
|
pos = ncfind(duri->fraglist, key);
|
|
|
|
if(pos < 0) { /* does not exist */
|
|
|
|
if((ret = extendenvv(&duri->fraglist,2,&len))) goto done;
|
|
|
|
duri->fraglist[len] = strdup(key);
|
|
|
|
duri->fraglist[len+1] = nulldup(value);
|
|
|
|
duri->fraglist[len+2] = NULL;
|
|
|
|
} else {
|
|
|
|
nullfree(duri->fraglist[pos+1]);
|
|
|
|
duri->fraglist[pos+1] = strdup(value);
|
|
|
|
}
|
|
|
|
/* Rebuild the fragment */
|
|
|
|
if((ret = unparselist((const char**)duri->fraglist,"#",0,&newlist))) goto done;
|
|
|
|
nullfree(duri->fragment);
|
|
|
|
duri->fragment = newlist; newlist = NULL;
|
2019-09-30 02:59:28 +08:00
|
|
|
done:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
#if 0
|
2012-08-09 07:15:18 +08:00
|
|
|
/* Replace the constraints */
|
2017-03-09 08:01:10 +08:00
|
|
|
int
|
2012-08-09 07:15:18 +08:00
|
|
|
ncurisetconstraints(NCURI* duri,const char* constraints)
|
|
|
|
{
|
|
|
|
char* proj = NULL;
|
|
|
|
char* select = NULL;
|
|
|
|
const char* p;
|
|
|
|
|
|
|
|
if(duri->constraint != NULL) free(duri->constraint);
|
|
|
|
if(duri->projection != NULL) free(duri->projection);
|
|
|
|
if(duri->selection != NULL) free(duri->selection);
|
2014-08-02 00:47:20 +08:00
|
|
|
duri->constraint = NULL;
|
|
|
|
duri->projection = NULL;
|
2012-08-09 07:15:18 +08:00
|
|
|
duri->selection = NULL;
|
|
|
|
|
2019-09-30 02:59:28 +08:00
|
|
|
if(constraints == NULL || strlen(constraints)==0) return (NC_ECONSTRAINTS);
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
duri->constraint = nulldup(constraints);
|
|
|
|
if(*duri->constraint == '?')
|
|
|
|
nclshift1(duri->constraint);
|
|
|
|
|
|
|
|
p = duri->constraint;
|
|
|
|
proj = (char*) p;
|
|
|
|
select = strchr(proj,'&');
|
|
|
|
if(select != NULL) {
|
2014-03-11 02:09:36 +08:00
|
|
|
size_t plen = (size_t)(select - proj);
|
2012-08-09 07:15:18 +08:00
|
|
|
if(plen == 0) {
|
|
|
|
proj = NULL;
|
|
|
|
} else {
|
|
|
|
proj = (char*)malloc(plen+1);
|
|
|
|
memcpy((void*)proj,p,plen);
|
|
|
|
proj[plen] = EOFCHAR;
|
|
|
|
}
|
|
|
|
select = nulldup(select);
|
|
|
|
} else {
|
|
|
|
proj = nulldup(proj);
|
|
|
|
select = NULL;
|
|
|
|
}
|
|
|
|
duri->projection = proj;
|
|
|
|
duri->selection = select;
|
2019-09-30 02:59:28 +08:00
|
|
|
return NC_NOERR;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
#endif
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
/* Construct a complete NC URI.
|
|
|
|
Optionally with the constraints.
|
|
|
|
Optionally with the user parameters.
|
|
|
|
Caller frees returned string.
|
|
|
|
Optionally encode the pieces.
|
|
|
|
*/
|
|
|
|
|
|
|
|
char*
|
|
|
|
ncuribuild(NCURI* duri, const char* prefix, const char* suffix, int flags)
|
|
|
|
{
|
2017-03-09 08:01:10 +08:00
|
|
|
char* newuri = NULL;
|
|
|
|
NCbytes* buf = ncbytesnew();
|
Yet another fix for DAP2 double URL encoding.
re: https://github.com/Unidata/netcdf-c/issues/1876
and: https://github.com/Unidata/netcdf-c/pull/1835
and: https://github.com/Unidata/netcdf4-python/issues/1041
The change in PR 1835 was correct with respect to using %20 instead of '+'
for encoding blanks. However, it was a mistake to assume everything was
unencoded and then to do encoding ourselves. The problem is that
different servers do different things, with Columbia being an outlier.
So, I have added a set of client controls that can at least give
the caller some control over this. The caller can append
the following fragment to his URL to control what gets encoded before
sending it to the server. The syntax is as follows:
````
https://<host>/<path>/<query>#encode=path|query|all|none
````
The possible values:
* path -- URL encode (i.e. %xx encode) as needed in the path part of the URL.
* query -- URL encode as needed in the query part of the URL.
* all -- equivalent to ````#encode=path,query````.
* none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness.
Note that if "encode=" is used, then before it is processed, all encoding
is turned of so that ````#encode=path```` will only encode the path
and not the query.
The default is ````#encode=query````, so the path is left untouched,
but the query is always encoded.
Internally, this required changes to pass the encode flags down into
the OC2 library.
Misc. Unrelated Changes:
* Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
|
|
|
const int encodepath = (flags&NCURIENCODEPATH ? 1 : 0);
|
|
|
|
const int encodequery = (flags&NCURIENCODEQUERY ? 1 : 0);
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
if(prefix != NULL)
|
|
|
|
ncbytescat(buf,prefix);
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytescat(buf,duri->protocol);
|
|
|
|
ncbytescat(buf,"://"); /* this will produce file:///... */
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
if((flags & NCURIPWD) && duri->user != NULL && duri->password != NULL) {
|
2017-08-30 04:11:15 +08:00
|
|
|
/* The user and password must be encoded */
|
|
|
|
char* encoded = ncuriencodeonly(duri->user,userpwdallow);
|
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytescat(buf,":");
|
2017-08-30 04:11:15 +08:00
|
|
|
encoded = ncuriencodeonly(duri->password,userpwdallow);
|
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytescat(buf,"@");
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
if(duri->host != NULL) ncbytescat(buf,duri->host);
|
2012-08-09 07:15:18 +08:00
|
|
|
if(duri->port != NULL) {
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytescat(buf,":");
|
|
|
|
ncbytescat(buf,duri->port);
|
|
|
|
}
|
|
|
|
if((flags & NCURIPATH)) {
|
|
|
|
if(duri->path == NULL)
|
|
|
|
ncbytescat(buf,"/");
|
Yet another fix for DAP2 double URL encoding.
re: https://github.com/Unidata/netcdf-c/issues/1876
and: https://github.com/Unidata/netcdf-c/pull/1835
and: https://github.com/Unidata/netcdf4-python/issues/1041
The change in PR 1835 was correct with respect to using %20 instead of '+'
for encoding blanks. However, it was a mistake to assume everything was
unencoded and then to do encoding ourselves. The problem is that
different servers do different things, with Columbia being an outlier.
So, I have added a set of client controls that can at least give
the caller some control over this. The caller can append
the following fragment to his URL to control what gets encoded before
sending it to the server. The syntax is as follows:
````
https://<host>/<path>/<query>#encode=path|query|all|none
````
The possible values:
* path -- URL encode (i.e. %xx encode) as needed in the path part of the URL.
* query -- URL encode as needed in the query part of the URL.
* all -- equivalent to ````#encode=path,query````.
* none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness.
Note that if "encode=" is used, then before it is processed, all encoding
is turned of so that ````#encode=path```` will only encode the path
and not the query.
The default is ````#encode=query````, so the path is left untouched,
but the query is always encoded.
Internally, this required changes to pass the encode flags down into
the OC2 library.
Misc. Unrelated Changes:
* Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
|
|
|
else if(encodepath) {
|
2017-08-30 04:11:15 +08:00
|
|
|
char* encoded = ncuriencodeonly(duri->path,pathallow);
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
2018-06-09 02:08:33 +08:00
|
|
|
} else
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytescat(buf,duri->path);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The suffix is intended to some kind of path extension (e.g. .dds)
|
|
|
|
so insert here
|
|
|
|
*/
|
|
|
|
if(suffix != NULL)
|
|
|
|
ncbytescat(buf,suffix);
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2019-07-15 05:56:29 +08:00
|
|
|
/* The query and the querylist are assumed to be unencoded */
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
if(flags & NCURIQUERY) {
|
|
|
|
ensurequerylist(duri);
|
|
|
|
if(duri->query != NULL) {
|
|
|
|
ncbytescat(buf,"?");
|
Yet another fix for DAP2 double URL encoding.
re: https://github.com/Unidata/netcdf-c/issues/1876
and: https://github.com/Unidata/netcdf-c/pull/1835
and: https://github.com/Unidata/netcdf4-python/issues/1041
The change in PR 1835 was correct with respect to using %20 instead of '+'
for encoding blanks. However, it was a mistake to assume everything was
unencoded and then to do encoding ourselves. The problem is that
different servers do different things, with Columbia being an outlier.
So, I have added a set of client controls that can at least give
the caller some control over this. The caller can append
the following fragment to his URL to control what gets encoded before
sending it to the server. The syntax is as follows:
````
https://<host>/<path>/<query>#encode=path|query|all|none
````
The possible values:
* path -- URL encode (i.e. %xx encode) as needed in the path part of the URL.
* query -- URL encode as needed in the query part of the URL.
* all -- equivalent to ````#encode=path,query````.
* none -- do not url encode any part of the URL sent to the server; not strictly necessary, so mostly for completeness.
Note that if "encode=" is used, then before it is processed, all encoding
is turned of so that ````#encode=path```` will only encode the path
and not the query.
The default is ````#encode=query````, so the path is left untouched,
but the query is always encoded.
Internally, this required changes to pass the encode flags down into
the OC2 library.
Misc. Unrelated Changes:
* Shut up those irritating warning from putget.m4
2020-11-06 02:04:56 +08:00
|
|
|
if(encodequery) {
|
|
|
|
char* encoded = ncuriencodeonly(duri->query,queryallow);
|
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
|
|
|
} else
|
|
|
|
ncbytescat(buf,duri->query);
|
|
|
|
}
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
if(flags & NCURIFRAG) {
|
|
|
|
ensurefraglist(duri);
|
|
|
|
if(duri->fragment != NULL) {
|
|
|
|
ncbytescat(buf,"#");
|
|
|
|
ncbytescat(buf,duri->fragment);
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
ncbytesnull(buf);
|
|
|
|
newuri = ncbytesextract(buf);
|
|
|
|
ncbytesfree(buf);
|
2012-08-09 07:15:18 +08:00
|
|
|
return newuri;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
const char*
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ncurifragmentlookup(NCURI* uri, const char* key)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
2017-03-09 08:01:10 +08:00
|
|
|
int i;
|
|
|
|
char* value = NULL;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
if(uri == NULL || key == NULL) return NULL;
|
|
|
|
ensurefraglist(uri);
|
2017-03-09 08:01:10 +08:00
|
|
|
i = ncfind(uri->fraglist,key);
|
|
|
|
if(i < 0)
|
|
|
|
return NULL;
|
|
|
|
value = uri->fraglist[(2*i)+1];
|
|
|
|
return value;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
const char*
|
|
|
|
ncuriquerylookup(NCURI* uri, const char* key)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
2016-04-13 01:18:34 +08:00
|
|
|
int i;
|
|
|
|
char* value = NULL;
|
2017-03-09 08:01:10 +08:00
|
|
|
if(uri == NULL || key == NULL || uri->querylist == NULL) return NULL;
|
|
|
|
i = ncfind(uri->querylist,key);
|
2016-04-13 01:18:34 +08:00
|
|
|
if(i < 0)
|
2017-03-09 08:01:10 +08:00
|
|
|
return NULL;
|
|
|
|
value = uri->querylist[(2*i)+1];
|
|
|
|
return value;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
Provide byte-range reading of remote datasets
re: issue https://github.com/Unidata/netcdf-c/issues/1251
Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.
This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.
Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.
Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.
An additional goal here is to gain some experience with
the Amazon S3 REST protocol.
This architecture and its use documented in
the file docs/byterange.dox.
There are currently two test cases:
1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
datasets.
This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).
1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs
Other changes:
1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
fragment tag with a more general mode= tag.
Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.
2019-01-02 09:27:36 +08:00
|
|
|
/* Obtain the complete list of fragment pairs in envv format */
|
|
|
|
const char**
|
|
|
|
ncurifragmentparams(NCURI* uri)
|
|
|
|
{
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ensurefraglist(uri);
|
Provide byte-range reading of remote datasets
re: issue https://github.com/Unidata/netcdf-c/issues/1251
Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.
This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.
Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.
Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.
An additional goal here is to gain some experience with
the Amazon S3 REST protocol.
This architecture and its use documented in
the file docs/byterange.dox.
There are currently two test cases:
1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
datasets.
This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).
1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs
Other changes:
1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
fragment tag with a more general mode= tag.
Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.
2019-01-02 09:27:36 +08:00
|
|
|
return (const char**)uri->fraglist;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Obtain the complete list of query pairs in envv format */
|
|
|
|
const char**
|
|
|
|
ncuriqueryparams(NCURI* uri)
|
|
|
|
{
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ensurequerylist(uri);
|
Provide byte-range reading of remote datasets
re: issue https://github.com/Unidata/netcdf-c/issues/1251
Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.
This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.
Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.
Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.
An additional goal here is to gain some experience with
the Amazon S3 REST protocol.
This architecture and its use documented in
the file docs/byterange.dox.
There are currently two test cases:
1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
datasets.
This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).
1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs
Other changes:
1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
fragment tag with a more general mode= tag.
Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.
2019-01-02 09:27:36 +08:00
|
|
|
return (const char**)uri->querylist;
|
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
#if 0
|
2012-08-09 07:15:18 +08:00
|
|
|
int
|
2017-03-09 08:01:10 +08:00
|
|
|
ncuriremoveparam(NCURI* uri, const char* key)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
2017-03-09 08:01:10 +08:00
|
|
|
char** p;
|
|
|
|
char** q = NULL;
|
|
|
|
|
2019-09-30 02:59:28 +08:00
|
|
|
if(uri->fraglist == NULL) return NC_NOERR;
|
2017-03-09 08:01:10 +08:00
|
|
|
for(q=uri->fraglist,p=uri->fraglist;*p;) {
|
|
|
|
if(strcmp(key,*p)==0) {
|
2018-06-09 02:08:33 +08:00
|
|
|
p += 2; /* skip this entry */
|
2017-03-09 08:01:10 +08:00
|
|
|
} else {
|
|
|
|
*q++ = *p++; /* move key */
|
|
|
|
*q++ = *p++; /* move value */
|
|
|
|
}
|
|
|
|
}
|
2019-09-30 02:59:28 +08:00
|
|
|
return NC_NOERR;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
#endif
|
|
|
|
|
2012-08-09 07:15:18 +08:00
|
|
|
|
2017-11-09 10:02:13 +08:00
|
|
|
/* Internal version of lookup; returns the paired index of the key;
|
|
|
|
case insensitive
|
|
|
|
*/
|
2012-08-09 07:15:18 +08:00
|
|
|
static int
|
|
|
|
ncfind(char** params, const char* key)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
char** p;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
if(key == NULL) return -1;
|
|
|
|
if(params == NULL) return -1;
|
2012-08-09 07:15:18 +08:00
|
|
|
for(i=0,p=params;*p;p+=2,i++) {
|
2017-11-09 10:02:13 +08:00
|
|
|
if(strcasecmp(key,*p)==0) return i;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
|
|
|
|
#if 0
|
2012-08-09 07:15:18 +08:00
|
|
|
static void
|
|
|
|
ncparamfree(char** params)
|
|
|
|
{
|
|
|
|
char** p;
|
|
|
|
if(params == NULL) return;
|
|
|
|
for(p=params;*p;p+=2) {
|
|
|
|
free(*p);
|
|
|
|
if(p[1] != NULL) free(p[1]);
|
|
|
|
}
|
|
|
|
free(params);
|
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
#endif
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
/* Return the ptr to the first occurrence of
|
|
|
|
any char in the list. Return NULL if no
|
|
|
|
occurrences
|
|
|
|
*/
|
|
|
|
static char*
|
|
|
|
nclocate(char* p, const char* charlist)
|
|
|
|
{
|
|
|
|
for(;*p;p++) {
|
2017-03-09 08:01:10 +08:00
|
|
|
if(*p == '\\') p++;
|
|
|
|
else if(strchr(charlist,*p) != NULL)
|
2012-08-09 07:15:18 +08:00
|
|
|
return p;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
#if 0
|
2012-08-09 07:15:18 +08:00
|
|
|
/* Shift every char starting at p 1 place to the left */
|
|
|
|
static void
|
|
|
|
nclshift1(char* p)
|
|
|
|
{
|
|
|
|
if(p != NULL && *p != EOFCHAR) {
|
|
|
|
char* q = p++;
|
|
|
|
while((*q++=*p++));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Shift every char starting at p 1 place to the right */
|
|
|
|
static void
|
|
|
|
ncrshift1(char* p)
|
|
|
|
{
|
|
|
|
char cur;
|
|
|
|
cur = 0;
|
|
|
|
do {
|
|
|
|
char next = *p;
|
|
|
|
*p++ = cur;
|
|
|
|
cur = next;
|
|
|
|
} while(cur != 0);
|
|
|
|
*p = 0; /* make sure we are still null terminated */
|
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
#endif
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
/* Provide % encoders and decoders */
|
|
|
|
|
2019-03-31 04:06:20 +08:00
|
|
|
static const char* hexchars = "0123456789abcdefABCDEF";
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
static void
|
2014-03-11 00:01:44 +08:00
|
|
|
toHex(unsigned int b, char hex[2])
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
2017-08-30 04:11:15 +08:00
|
|
|
hex[0] = hexchars[(b >> 4) & 0xf];
|
|
|
|
hex[1] = hexchars[(b) & 0xf];
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2014-03-11 02:09:36 +08:00
|
|
|
static int
|
2012-08-09 07:15:18 +08:00
|
|
|
fromHex(int c)
|
|
|
|
{
|
2014-03-11 02:09:36 +08:00
|
|
|
if(c >= '0' && c <= '9') return (int) (c - '0');
|
|
|
|
if(c >= 'a' && c <= 'f') return (int) (10 + (c - 'a'));
|
|
|
|
if(c >= 'A' && c <= 'F') return (int) (10 + (c - 'A'));
|
2014-03-10 05:51:45 +08:00
|
|
|
return 0;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
|
2017-08-30 04:11:15 +08:00
|
|
|
/*
|
|
|
|
Support encode of user and password fields
|
|
|
|
*/
|
|
|
|
char*
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ncuriencodeuserpwd(const char* s)
|
2017-08-30 04:11:15 +08:00
|
|
|
{
|
|
|
|
return ncuriencodeonly(s,userpwdallow);
|
|
|
|
}
|
2012-08-09 07:15:18 +08:00
|
|
|
|
|
|
|
/* Return a string representing encoding of input; caller must free;
|
|
|
|
watch out: will encode whole string, so watch what you give it.
|
|
|
|
Allowable argument specifies characters that do not need escaping.
|
|
|
|
*/
|
|
|
|
|
|
|
|
char*
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ncuriencodeonly(const char* s, const char* allowable)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
|
|
|
size_t slen;
|
|
|
|
char* encoded;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
const char* inptr;
|
2012-08-09 07:15:18 +08:00
|
|
|
char* outptr;
|
|
|
|
|
|
|
|
if(s == NULL) return NULL;
|
|
|
|
|
|
|
|
slen = strlen(s);
|
|
|
|
encoded = (char*)malloc((3*slen) + 1); /* max possible size */
|
|
|
|
|
|
|
|
for(inptr=s,outptr=encoded;*inptr;) {
|
|
|
|
int c = *inptr++;
|
2020-09-09 02:41:12 +08:00
|
|
|
{
|
2012-08-09 07:15:18 +08:00
|
|
|
/* search allowable */
|
2017-08-30 04:11:15 +08:00
|
|
|
char* p = strchr(allowable,c);
|
|
|
|
if(p != NULL) {
|
|
|
|
*outptr++ = (char)c;
|
|
|
|
} else {
|
2012-08-09 07:15:18 +08:00
|
|
|
char hex[2];
|
|
|
|
toHex(c,hex);
|
|
|
|
*outptr++ = '%';
|
|
|
|
*outptr++ = hex[0];
|
|
|
|
*outptr++ = hex[1];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outptr = EOFCHAR;
|
|
|
|
return encoded;
|
|
|
|
}
|
|
|
|
|
2017-08-30 08:01:26 +08:00
|
|
|
/* Return a string representing decoding of input; caller must free;*/
|
2012-08-09 07:15:18 +08:00
|
|
|
char*
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ncuridecode(const char* s)
|
2012-08-09 07:15:18 +08:00
|
|
|
{
|
|
|
|
size_t slen;
|
|
|
|
char* decoded;
|
|
|
|
char* outptr;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
const char* inptr;
|
2012-08-09 07:15:18 +08:00
|
|
|
unsigned int c;
|
2014-08-02 00:47:20 +08:00
|
|
|
|
2012-08-09 07:15:18 +08:00
|
|
|
if (s == NULL) return NULL;
|
|
|
|
|
|
|
|
slen = strlen(s);
|
|
|
|
decoded = (char*)malloc(slen+1); /* Should be max we need */
|
|
|
|
|
|
|
|
outptr = decoded;
|
|
|
|
inptr = s;
|
2014-03-11 02:09:36 +08:00
|
|
|
while((c = (unsigned int)*inptr++)) {
|
2017-08-30 04:11:15 +08:00
|
|
|
if(c == '%') {
|
2012-08-09 07:15:18 +08:00
|
|
|
/* try to pull two hex more characters */
|
|
|
|
if(inptr[0] != EOFCHAR && inptr[1] != EOFCHAR
|
|
|
|
&& strchr(hexchars,inptr[0]) != NULL
|
|
|
|
&& strchr(hexchars,inptr[1]) != NULL) {
|
|
|
|
/* test conversion */
|
|
|
|
int xc = (fromHex(inptr[0]) << 4) | (fromHex(inptr[1]));
|
2017-08-30 04:11:15 +08:00
|
|
|
inptr += 2; /* decode it */
|
|
|
|
c = (unsigned int)xc;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
}
|
2014-03-11 02:09:36 +08:00
|
|
|
*outptr++ = (char)c;
|
2017-08-30 08:01:26 +08:00
|
|
|
}
|
|
|
|
*outptr = EOFCHAR;
|
|
|
|
return decoded;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Partially decode a string. Only characters in 'decodeset'
|
|
|
|
are decoded. Return decoded string; caller must free.
|
|
|
|
*/
|
|
|
|
char*
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
ncuridecodepartial(const char* s, const char* decodeset)
|
2017-08-30 08:01:26 +08:00
|
|
|
{
|
|
|
|
size_t slen;
|
|
|
|
char* decoded;
|
|
|
|
char* outptr;
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
const char* inptr;
|
2017-08-30 08:01:26 +08:00
|
|
|
unsigned int c;
|
|
|
|
|
|
|
|
if (s == NULL || decodeset == NULL) return NULL;
|
|
|
|
|
|
|
|
slen = strlen(s);
|
|
|
|
decoded = (char*)malloc(slen+1); /* Should be max we need */
|
|
|
|
|
|
|
|
outptr = decoded;
|
|
|
|
inptr = s;
|
|
|
|
while((c = (unsigned int)*inptr++)) {
|
|
|
|
if(c == '+' && strchr(decodeset,'+') != NULL)
|
|
|
|
*outptr++ = ' ';
|
|
|
|
else if(c == '%') {
|
|
|
|
/* try to pull two hex more characters */
|
|
|
|
if(inptr[0] != EOFCHAR && inptr[1] != EOFCHAR
|
|
|
|
&& strchr(hexchars,inptr[0]) != NULL
|
|
|
|
&& strchr(hexchars,inptr[1]) != NULL) {
|
|
|
|
/* test conversion */
|
|
|
|
int xc = (fromHex(inptr[0]) << 4) | (fromHex(inptr[1]));
|
|
|
|
if(strchr(decodeset,xc) != NULL) {
|
|
|
|
inptr += 2; /* decode it */
|
|
|
|
c = (unsigned int)xc;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*outptr++ = (char)c; /* pass either the % or decoded char */
|
|
|
|
} else /* Not a % char */
|
|
|
|
*outptr++ = (char)c;
|
2012-08-09 07:15:18 +08:00
|
|
|
}
|
|
|
|
*outptr = EOFCHAR;
|
|
|
|
return decoded;
|
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
|
2021-09-28 08:36:33 +08:00
|
|
|
/* Deep clone a uri */
|
|
|
|
NCURI*
|
|
|
|
ncuriclone(NCURI* uri)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
NCURI* newuri = NULL;
|
|
|
|
|
|
|
|
/* make sure fragments and query are up to date */
|
|
|
|
if((stat=ensurefraglist(uri))) goto done;
|
|
|
|
if((stat=ensurequerylist(uri))) goto done;
|
|
|
|
|
|
|
|
if((newuri = (NCURI*)calloc(1,sizeof(NCURI)))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
*newuri = *uri; /* copy */
|
|
|
|
/* deep clone fields */
|
|
|
|
|
|
|
|
newuri->uri = nulldup(uri->uri);
|
|
|
|
newuri->protocol = nulldup(uri->protocol);
|
|
|
|
newuri->user = nulldup(uri->user);
|
|
|
|
newuri->password = nulldup(uri->password);
|
|
|
|
newuri->host = nulldup(uri->host);
|
|
|
|
newuri->port = nulldup(uri->port);
|
|
|
|
newuri->path = nulldup(uri->path);
|
|
|
|
newuri->query = nulldup(uri->query);
|
|
|
|
newuri->fragment = nulldup(uri->fragment);
|
|
|
|
/* make these be rebuilt */
|
|
|
|
newuri->fraglist = NULL;
|
|
|
|
newuri->querylist = NULL;
|
|
|
|
done:
|
|
|
|
return newuri;
|
|
|
|
}
|
|
|
|
|
2017-03-09 08:01:10 +08:00
|
|
|
static int
|
|
|
|
collectprefixparams(char* text, char** nextp)
|
|
|
|
{
|
2019-09-30 02:59:28 +08:00
|
|
|
int ret = NC_NOERR;
|
2017-03-09 08:01:10 +08:00
|
|
|
char* sp;
|
|
|
|
char* ep;
|
|
|
|
char* last;
|
|
|
|
|
2019-09-30 02:59:28 +08:00
|
|
|
if(text == NULL) return NC_EURL;
|
2017-03-09 08:01:10 +08:00
|
|
|
if(strlen(text) == 0) {
|
|
|
|
if(nextp) *nextp = text;
|
2019-09-30 02:59:28 +08:00
|
|
|
return NC_NOERR;
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
|
|
|
/* pass 1: locate last rbracket and nul term the prefix */
|
|
|
|
sp = text;
|
|
|
|
last = NULL;
|
|
|
|
for(;;) {
|
|
|
|
if(*sp != LBRACKET) {
|
|
|
|
if(nextp) *nextp = sp;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* use nclocate because \\ escapes might be present */
|
|
|
|
ep = nclocate(sp,RBRACKETSTR);
|
2019-09-30 02:59:28 +08:00
|
|
|
if(ep == NULL) {ret = NC_EINVAL; goto done;} /* malformed */
|
2017-03-09 08:01:10 +08:00
|
|
|
last = ep; /* save this position */
|
|
|
|
ep++; /* move past rbracket */
|
|
|
|
sp = ep;
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
/* nul terminate */
|
|
|
|
if(last != NULL)
|
|
|
|
terminate(last);
|
|
|
|
|
|
|
|
/* pass 2: convert [] to & */
|
|
|
|
sp = text;
|
|
|
|
for(;;) {
|
|
|
|
char* p; char* q;
|
|
|
|
/* by construction, here we are at an LBRACKET: compress it out */
|
2018-01-12 01:31:13 +08:00
|
|
|
for(p=sp,q=sp+1;(*p++=*q++);)
|
2018-06-09 02:08:33 +08:00
|
|
|
;
|
2017-03-09 08:01:10 +08:00
|
|
|
/* locate the next RRACKET */
|
|
|
|
ep = nclocate(sp,RBRACKETSTR);
|
|
|
|
if(ep == NULL) break;/* we are done */
|
|
|
|
/* convert the BRACKET to '&' */
|
|
|
|
*ep = '&';
|
|
|
|
ep++; /* move past rbracket */
|
|
|
|
sp = ep;
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
done:
|
2018-06-09 02:08:33 +08:00
|
|
|
return ret;
|
2017-03-09 08:01:10 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2019-09-30 02:59:28 +08:00
|
|
|
parselist(const char* text, NClist* list)
|
2017-03-09 08:01:10 +08:00
|
|
|
{
|
2019-09-30 02:59:28 +08:00
|
|
|
int ret = NC_NOERR;
|
|
|
|
char* ptext = NULL;
|
2017-03-09 08:01:10 +08:00
|
|
|
char* p;
|
2019-09-30 02:59:28 +08:00
|
|
|
ptext = strdup(text); /* We need to modify */
|
2017-03-09 08:01:10 +08:00
|
|
|
p = ptext; /* start of next parameter */
|
|
|
|
for(;;) {
|
|
|
|
char* sp = p;
|
|
|
|
char* eq;
|
|
|
|
char* ep;
|
|
|
|
char* key;
|
|
|
|
char* value;
|
|
|
|
if(*p == EOFCHAR) break; /* we are done */
|
|
|
|
/* use nclocate because \\ escapes might be present */
|
|
|
|
ep = nclocate(sp,"&");
|
|
|
|
if(ep != NULL) {
|
|
|
|
terminate(ep); /* overwrite the trailing ampersand */
|
|
|
|
p = ep+1; /* next param */
|
2018-06-09 02:08:33 +08:00
|
|
|
}
|
2017-03-09 08:01:10 +08:00
|
|
|
/* split into key + value */
|
|
|
|
eq = strchr(sp,'=');
|
2018-06-09 02:08:33 +08:00
|
|
|
if(eq != NULL) { /* value is present */
|
2017-03-09 08:01:10 +08:00
|
|
|
terminate(eq); eq++;
|
|
|
|
key = strdup(sp);
|
|
|
|
value = strdup(eq);
|
|
|
|
} else {/* there is no value */
|
|
|
|
key = strdup(sp);
|
|
|
|
value = strdup("");
|
|
|
|
}
|
|
|
|
nclistpush(list,key);
|
|
|
|
nclistpush(list,value);
|
|
|
|
if(ep == NULL)
|
|
|
|
break;
|
|
|
|
}
|
2019-09-30 02:59:28 +08:00
|
|
|
nullfree(ptext);
|
2017-03-09 08:01:10 +08:00
|
|
|
return ret;
|
|
|
|
}
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
|
|
|
|
static int
|
|
|
|
unparselist(const char** vec, const char* prefix, int encode, char** svecp)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
NCbytes* buf = ncbytesnew();
|
|
|
|
const char** p;
|
|
|
|
int first = 1;
|
|
|
|
|
|
|
|
if(vec == NULL || vec[0] == NULL) goto done;
|
|
|
|
if(prefix != NULL) ncbytescat(buf,prefix);
|
|
|
|
for(p=vec;*p;p+=2,first=0) {
|
|
|
|
if(!first) ncbytescat(buf,"&");
|
|
|
|
if(encode) {
|
|
|
|
char* encoded = ncuriencodeonly(p[0],queryallow);
|
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
|
|
|
} else
|
|
|
|
ncbytescat(buf,p[0]);
|
|
|
|
if(p[1] != NULL && strlen(p[1]) > 0) {
|
|
|
|
ncbytescat(buf,"=");
|
|
|
|
if(encode) {
|
|
|
|
char* encoded = ncuriencodeonly(p[1],queryallow);
|
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
|
|
|
} else
|
|
|
|
ncbytescat(buf,p[1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if(svecp) {*svecp = ncbytesextract(buf);}
|
|
|
|
done:
|
|
|
|
ncbytesfree(buf);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ensurefraglist(NCURI* uri)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int nofrag = 0;
|
|
|
|
int nolist = 0;
|
|
|
|
NClist* fraglist = NULL;
|
|
|
|
NCbytes* frag = NULL;
|
|
|
|
|
|
|
|
if(uri->fragment == NULL || strlen(uri->fragment) == 0)
|
|
|
|
{nullfree(uri->fragment); uri->fragment = NULL; nofrag=1;}
|
|
|
|
if(uri->fraglist == NULL) nolist = 1;
|
|
|
|
if(nolist && !nofrag) {
|
|
|
|
fraglist = nclistnew();
|
|
|
|
if((stat = parselist(uri->fragment,fraglist))) goto done;
|
|
|
|
removedups(fraglist);
|
|
|
|
uri->fraglist = nclistextract(fraglist);
|
|
|
|
} else if(!nolist && nofrag) {
|
|
|
|
/* Create the fragment string from fraglist */
|
|
|
|
frag = ncbytesnew();
|
|
|
|
buildlist((const char**)uri->fraglist,1,frag);
|
|
|
|
uri->fragment = ncbytesextract(frag);
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
ncbytesfree(frag);
|
|
|
|
nclistfreeall(fraglist);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ensurequerylist(NCURI* uri)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int noquery = 0;
|
|
|
|
int nolist = 0;
|
|
|
|
NClist* querylist = NULL;
|
|
|
|
NCbytes* query = NULL;
|
|
|
|
|
|
|
|
if(uri->query == NULL || strlen(uri->query) == 0)
|
|
|
|
{nullfree(uri->query); uri->query = NULL; noquery=1;}
|
|
|
|
if(uri->querylist == NULL) nolist = 1;
|
|
|
|
if(nolist && !noquery) {
|
|
|
|
querylist = nclistnew();
|
|
|
|
if((stat = parselist(uri->query,querylist))) goto done;
|
|
|
|
removedups(querylist);
|
|
|
|
uri->querylist = nclistextract(querylist);
|
|
|
|
} else if(!nolist && noquery) {
|
|
|
|
/* Create the query string from querylist */
|
|
|
|
query = ncbytesnew();
|
|
|
|
buildlist((const char**)uri->querylist,1,query);
|
|
|
|
uri->query = ncbytesextract(query);
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
ncbytesfree(query);
|
|
|
|
nclistfreeall(querylist);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
removedups(NClist* list)
|
|
|
|
{
|
|
|
|
int i,j;
|
|
|
|
|
|
|
|
if(nclistlength(list) <= 2) return; /* need at least 2 pairs */
|
|
|
|
for(i=0;i<nclistlength(list);i+=2) {
|
|
|
|
/* look for dups for this entry */
|
|
|
|
for(j=nclistlength(list)-2;j>i;j-=2) {
|
|
|
|
if(strcasecmp(nclistget(list,i),nclistget(list,j))==0
|
|
|
|
&& strcasecmp(nclistget(list,i+1),nclistget(list,j+1))) {
|
|
|
|
nclistremove(list,j+1); nclistremove(list,j);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* NULL terminate the list */
|
|
|
|
nclistpush(list,NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
buildlist(const char** list, int encode, NCbytes* buf)
|
|
|
|
{
|
|
|
|
const char** p;
|
|
|
|
int first = 1;
|
|
|
|
for(p=list;*p;p+=2,first=0) {
|
|
|
|
if(!first) ncbytescat(buf,"&");
|
|
|
|
ncbytescat(buf,p[0]);
|
|
|
|
if(p[1] != NULL && strlen(p[1]) > 0) {
|
|
|
|
ncbytescat(buf,"=");
|
|
|
|
if(encode) {
|
|
|
|
char* encoded = ncuriencodeonly(p[1],queryallow);
|
|
|
|
ncbytescat(buf,encoded);
|
|
|
|
nullfree(encoded);
|
|
|
|
} else
|
|
|
|
ncbytescat(buf,p[1]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
extendenvv(char*** envvp, int amount, int* oldlenp)
|
|
|
|
{
|
|
|
|
char** envv = *envvp;
|
|
|
|
char** p;
|
|
|
|
int len;
|
|
|
|
for(len=0,p=envv;*p;p++) len++;
|
|
|
|
*oldlenp = len;
|
|
|
|
if((envv = (char**)malloc((amount+len+1)*sizeof(char*)))==NULL) return NC_ENOMEM;
|
|
|
|
memcpy(envv,*envvp,sizeof(char*)*len);
|
|
|
|
envv[len] = NULL;
|
|
|
|
nullfree(*envvp);
|
|
|
|
*envvp = envv; envv = NULL;
|
|
|
|
return NC_NOERR;
|
|
|
|
}
|