mirror of
https://github.com/Unidata/netcdf-c.git
synced 2024-12-27 08:49:16 +08:00
f6e25b695e
re: https://github.com/Unidata/netcdf-c/issues/2117 re: https://github.com/Unidata/netcdf-c/issues/2119 * Modify libsrc to allow byte-range reading of netcdf-3 files in private S3 buckets; this required using the aws sdk. Also add a test case. * The aws sdk can sometimes cause problems if the Awd::ShutdownAPI function is not called. So at optional atexit() support to ensure it is called. This is disabled for Windows. * Add documentation to nczarr.md on how to build and use the aws sdk under windows. Currently it builds, but testing fails. * Switch testing from stratus to the Unidata bucket on S3. * Improve support for the s3: url protocol. * Add a s3 specific utility code file: ds3util.c * Modify NC_infermodel to attempt to read the magic number of byte-ranged files in S3. ## Misc. * Move and rename the core S3 SDK wrapper code (libnczarr/zs3sdk.cpp) to libdispatch since it now used in libsrc as well as libnczarr. * Add calls to nc_finalize in the utilities in case atexit is disabled. * Add header only json parser to the distribution rather than as a built source.
251 lines
6.9 KiB
C
251 lines
6.9 KiB
C
/*********************************************************************
|
|
* Copyright 2018, UCAR/Unidata
|
|
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
|
*********************************************************************/
|
|
|
|
#include "config.h"
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <assert.h>
|
|
#ifdef HAVE_UNISTD_H
|
|
#include <unistd.h>
|
|
#endif
|
|
#ifdef HAVE_SYS_STAT_H
|
|
#include <sys/stat.h>
|
|
#endif
|
|
#ifdef HAVE_FCNTL_H
|
|
#include <fcntl.h>
|
|
#endif
|
|
#ifdef _MSC_VER
|
|
#include <io.h>
|
|
#endif
|
|
|
|
#include "netcdf.h"
|
|
#include "ncuri.h"
|
|
#include "ncrc.h"
|
|
|
|
|
|
#undef AWSDEBUG
|
|
|
|
#define AWSHOST ".amazonaws.com"
|
|
|
|
enum URLFORMAT {UF_NONE=0, UF_VIRTUAL=1, UF_PATH=2, UF_S3=3, UF_OTHER=4};
|
|
|
|
/* Forward */
|
|
static int endswith(const char* s, const char* suffix);
|
|
|
|
/**************************************************/
|
|
/* Generic S3 Utilities */
|
|
|
|
/*
|
|
Rebuild an S3 url into a canonical path-style url.
|
|
If region is not in the host, then use specified region
|
|
if provided, otherwise us-east-1.
|
|
@param url (in) the current url
|
|
@param region (in) region to use if needed; NULL => us-east-1
|
|
(out) region from url or the input region
|
|
@param pathurlp (out) the resulting pathified url string
|
|
@param bucketp (out) the bucket from the url
|
|
*/
|
|
|
|
int
|
|
NC_s3urlrebuild(NCURI* url, NCURI** newurlp, char** bucketp, char** outregionp)
|
|
{
|
|
int i,stat = NC_NOERR;
|
|
NClist* hostsegments = NULL;
|
|
NClist* pathsegments = NULL;
|
|
NCbytes* buf = ncbytesnew();
|
|
NCURI* newurl = NULL;
|
|
char* bucket = NULL;
|
|
char* host = NULL;
|
|
char* path = NULL;
|
|
char* region = NULL;
|
|
|
|
if(url == NULL)
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
/* Parse the hostname */
|
|
hostsegments = nclistnew();
|
|
/* split the hostname by "." */
|
|
if((stat = NC_split_delim(url->host,'.',hostsegments))) goto done;
|
|
|
|
/* Parse the path*/
|
|
pathsegments = nclistnew();
|
|
/* split the path by "/" */
|
|
if((stat = NC_split_delim(url->path,'/',pathsegments))) goto done;
|
|
|
|
/* Distinguish path-style from virtual-host style from s3: and from other.
|
|
Virtual: https://bucket-name.s3.Region.amazonaws.com/<path>
|
|
Path: https://s3.Region.amazonaws.com/bucket-name/<path>
|
|
S3: s3://bucket-name/<path>
|
|
Other: https://<host>/bucketname/<path>
|
|
*/
|
|
if(url->host == NULL || strlen(url->host) == 0)
|
|
{stat = NC_EURL; goto done;}
|
|
if(strcmp(url->protocol,"s3")==0 && nclistlength(hostsegments)==1) {
|
|
bucket = strdup(url->host);
|
|
region = NULL; /* unknown at this point */
|
|
} else if(endswith(url->host,AWSHOST)) { /* Virtual or path */
|
|
switch (nclistlength(hostsegments)) {
|
|
default: stat = NC_EURL; goto done;
|
|
case 4:
|
|
if(strcasecmp(nclistget(hostsegments,0),"s3")!=0)
|
|
{stat = NC_EURL; goto done;}
|
|
region = strdup(nclistget(hostsegments,1));
|
|
if(nclistlength(pathsegments) > 0)
|
|
bucket = nclistremove(pathsegments,0);
|
|
break;
|
|
case 5:
|
|
if(strcasecmp(nclistget(hostsegments,1),"s3")!=0)
|
|
{stat = NC_EURL; goto done;}
|
|
region = strdup(nclistget(hostsegments,2));
|
|
bucket = strdup(nclistget(hostsegments,0));
|
|
break;
|
|
}
|
|
} else {
|
|
if((host = strdup(url->host))==NULL)
|
|
{stat = NC_ENOMEM; goto done;}
|
|
/* region is unknown */
|
|
region = NULL;
|
|
/* bucket is assumed to be start of the path */
|
|
if(nclistlength(pathsegments) > 0)
|
|
bucket = nclistremove(pathsegments,0);
|
|
}
|
|
/* If region is null, use default */
|
|
if(region == NULL) {
|
|
const char* region0 = NULL;
|
|
/* Get default region */
|
|
if((stat = NC_getdefaults3region(url,®ion0))) goto done;
|
|
region = strdup(region0);
|
|
}
|
|
/* Construct the revised host */
|
|
ncbytescat(buf,"s3.");
|
|
ncbytescat(buf,region);
|
|
ncbytescat(buf,AWSHOST);
|
|
host = ncbytesextract(buf);
|
|
|
|
/* Construct the revised path */
|
|
ncbytesclear(buf);
|
|
ncbytescat(buf,"/");
|
|
if(bucket == NULL)
|
|
{stat = NC_EURL; goto done;}
|
|
ncbytescat(buf,bucket);
|
|
for(i=0;i<nclistlength(pathsegments);i++) {
|
|
ncbytescat(buf,"/");
|
|
ncbytescat(buf,nclistget(pathsegments,i));
|
|
}
|
|
path = ncbytesextract(buf);
|
|
/* complete the new url */
|
|
if((newurl=ncuriclone(url))==NULL) {stat = NC_ENOMEM; goto done;}
|
|
ncurisetprotocol(newurl,"https");
|
|
ncurisethost(newurl,host);
|
|
ncurisetpath(newurl,path);
|
|
/* return various items */
|
|
#ifdef AWSDEBUG
|
|
{
|
|
char* s = ncuribuild(newurl,NULL,NULL,NCURIALL);
|
|
fprintf(stderr,">>> NC_s3urlrebuild: final=%s bucket=%s region=%s\n",s,bucket,region);
|
|
nullfree(s);
|
|
}
|
|
#endif
|
|
if(newurlp) {*newurlp = newurl; newurl = NULL;}
|
|
if(bucketp) {*bucketp = bucket; bucket = NULL;}
|
|
if(outregionp) {*outregionp = region; region = NULL;}
|
|
|
|
done:
|
|
nullfree(region);
|
|
nullfree(bucket)
|
|
nullfree(host)
|
|
nullfree(path)
|
|
ncurifree(newurl);
|
|
ncbytesfree(buf);
|
|
nclistfreeall(hostsegments);
|
|
nclistfreeall(pathsegments);
|
|
return stat;
|
|
}
|
|
|
|
static int
|
|
endswith(const char* s, const char* suffix)
|
|
{
|
|
ssize_t ls, lsf, delta;
|
|
if(s == NULL || suffix == NULL) return 0;
|
|
ls = strlen(s);
|
|
lsf = strlen(suffix);
|
|
delta = (ls - lsf);
|
|
if(delta < 0) return 0;
|
|
if(memcmp(s+delta,suffix,lsf)!=0) return 0;
|
|
return 1;
|
|
}
|
|
|
|
/**************************************************/
|
|
/* S3 utilities */
|
|
|
|
EXTERNL int
|
|
NC_s3urlprocess(NCURI* url, NCS3INFO* s3)
|
|
{
|
|
int stat = NC_NOERR;
|
|
NCURI* url2 = NULL;
|
|
NClist* pathsegments = NULL;
|
|
const char* profile0 = NULL;
|
|
|
|
if(url == NULL || s3 == NULL)
|
|
{stat = NC_EURL; goto done;}
|
|
/* Get current profile */
|
|
if((stat = NC_getactives3profile(url,&profile0))) goto done;
|
|
if(profile0 == NULL) profile0 = "none";
|
|
s3->profile = strdup(profile0);
|
|
|
|
/* Rebuild the URL to path format and get a usable region*/
|
|
if((stat = NC_s3urlrebuild(url,&url2,&s3->bucket,&s3->region))) goto done;
|
|
s3->host = strdup(url2->host);
|
|
/* construct the rootkey minus the leading bucket */
|
|
pathsegments = nclistnew();
|
|
if((stat = NC_split_delim(url2->path,'/',pathsegments))) goto done;
|
|
if(nclistlength(pathsegments) > 0) {
|
|
char* seg = nclistremove(pathsegments,0);
|
|
nullfree(seg);
|
|
}
|
|
if((stat = NC_join(pathsegments,&s3->rootkey))) goto done;
|
|
|
|
done:
|
|
ncurifree(url2);
|
|
nclistfreeall(pathsegments);
|
|
return stat;
|
|
}
|
|
|
|
int
|
|
NC_s3clear(NCS3INFO* s3)
|
|
{
|
|
if(s3) {
|
|
nullfree(s3->host); s3->host = NULL;
|
|
nullfree(s3->region); s3->region = NULL;
|
|
nullfree(s3->bucket); s3->bucket = NULL;
|
|
nullfree(s3->rootkey); s3->rootkey = NULL;
|
|
nullfree(s3->profile); s3->profile = NULL;
|
|
}
|
|
return NC_NOERR;
|
|
}
|
|
|
|
/*
|
|
Check if a url has indicators that signal an S3 url.
|
|
*/
|
|
|
|
int
|
|
NC_iss3(NCURI* uri)
|
|
{
|
|
int iss3 = 0;
|
|
|
|
if(uri == NULL) goto done; /* not a uri */
|
|
/* is the protocol "s3"? */
|
|
if(strcasecmp(uri->protocol,"s3")==0) {iss3 = 1; goto done;}
|
|
/* Is "s3" in the mode list? */
|
|
if(NC_testmode(uri,"s3")) {iss3 = 1; goto done;}
|
|
/* Last chance; see if host looks s3'y */
|
|
if(endswith(uri->host,AWSHOST)) {iss3 = 1; goto done;}
|
|
|
|
done:
|
|
return iss3;
|
|
}
|
|
|