netcdf-c/libdispatch/dutil.c
Dennis Heimbigner f6e25b695e Fix additional S3 support issues
re: https://github.com/Unidata/netcdf-c/issues/2117
re: https://github.com/Unidata/netcdf-c/issues/2119

* Modify libsrc to allow byte-range reading of netcdf-3 files in private S3 buckets; this required using the aws sdk. Also add a test case.
* The aws sdk can sometimes cause problems if the Awd::ShutdownAPI function is not called. So at optional atexit() support to ensure it is called. This is disabled for Windows.
* Add documentation to nczarr.md on how to build and use the aws sdk under windows. Currently it builds, but testing fails.
* Switch testing from stratus to the Unidata bucket on S3.
* Improve support for the s3: url protocol.
* Add a s3 specific utility code file: ds3util.c
* Modify NC_infermodel to attempt to read the magic number of byte-ranged files in S3.

## Misc.

* Move and rename the core S3 SDK wrapper code (libnczarr/zs3sdk.cpp) to libdispatch since it now used in libsrc as well as libnczarr.
* Add calls to nc_finalize in the utilities in case atexit is disabled.
* Add header only json parser to the distribution rather than as a built source.
2021-10-29 20:06:37 -06:00

428 lines
8.9 KiB
C

/*********************************************************************
* Copyright 2018, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*********************************************************************/
#include "config.h"
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <assert.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef _MSC_VER
#include <io.h>
#endif
#include "netcdf.h"
#include "ncuri.h"
#include "ncbytes.h"
#include "nclist.h"
#include "nclog.h"
#include "ncrc.h"
#include "ncpathmgr.h"
#define NC_MAX_PATH 4096
/**************************************************/
/**
* Provide a hidden interface to allow utilities
* to check if a given path name is really an ncdap4 url.
* If no, return null, else return basename of the url
* minus any extension.
*/
int
NC__testurl(const char* path, char** basenamep)
{
NCURI* uri;
int ok = NC_NOERR;
if(ncuriparse(path,&uri))
ok = NC_EURL;
else {
char* slash = (uri->path == NULL ? NULL : strrchr(uri->path, '/'));
char* dot;
if(slash == NULL) slash = (char*)path; else slash++;
slash = nulldup(slash);
if(slash == NULL)
dot = NULL;
else
dot = strrchr(slash, '.');
if(dot != NULL && dot != slash) *dot = '\0';
if(basenamep)
*basenamep=slash;
else if(slash)
free(slash);
}
ncurifree(uri);
return ok;
}
/* Return 1 if this machine is little endian */
int
NC_isLittleEndian(void)
{
union {
unsigned char bytes[SIZEOF_INT];
int i;
} u;
u.i = 1;
return (u.bytes[0] == 1 ? 1 : 0);
}
char*
NC_backslashEscape(const char* s)
{
const char* p;
char* q;
size_t len;
char* escaped = NULL;
len = strlen(s);
escaped = (char*)malloc(1+(2*len)); /* max is everychar is escaped */
if(escaped == NULL) return NULL;
for(p=s,q=escaped;*p;p++) {
char c = *p;
switch (c) {
case '\\':
case '/':
case '.':
case '@':
*q++ = '\\'; *q++ = '\\';
break;
default: *q++ = c; break;
}
}
*q = '\0';
return escaped;
}
char*
NC_backslashUnescape(const char* esc)
{
size_t len;
char* s;
const char* p;
char* q;
if(esc == NULL) return NULL;
len = strlen(esc);
s = (char*)malloc(len+1);
if(s == NULL) return NULL;
for(p=esc,q=s;*p;) {
switch (*p) {
case '\\':
p++;
/* fall thru */
default: *q++ = *p++; break;
}
}
*q = '\0';
return s;
}
char*
NC_entityescape(const char* s)
{
const char* p;
char* q;
size_t len;
char* escaped = NULL;
const char* entity;
len = strlen(s);
escaped = (char*)malloc(1+(6*len)); /* 6 = |&apos;| */
if(escaped == NULL) return NULL;
for(p=s,q=escaped;*p;p++) {
char c = *p;
switch (c) {
case '&': entity = "&amp;"; break;
case '<': entity = "&lt;"; break;
case '>': entity = "&gt;"; break;
case '"': entity = "&quot;"; break;
case '\'': entity = "&apos;"; break;
default : entity = NULL; break;
}
if(entity == NULL)
*q++ = c;
else {
len = strlen(entity);
memcpy(q,entity,len);
q+=len;
}
}
*q = '\0';
return escaped;
}
char*
/*
Depending on the platform, the shell will sometimes
pass an escaped octotherpe character without removing
the backslash. So this function is appropriate to be called
on possible url paths to unescape such cases. See e.g. ncgen.
*/
NC_shellUnescape(const char* esc)
{
size_t len;
char* s;
const char* p;
char* q;
if(esc == NULL) return NULL;
len = strlen(esc);
s = (char*)malloc(len+1);
if(s == NULL) return NULL;
for(p=esc,q=s;*p;) {
switch (*p) {
case '\\':
if(p[1] == '#')
p++;
/* fall thru */
default: *q++ = *p++; break;
}
}
*q = '\0';
return s;
}
/**
Wrap mktmp and return the generated path,
or null if failed.
Base is the base file path. XXXXX is appended
to allow mktmp add its unique id.
Return the generated path.
*/
char*
NC_mktmp(const char* base)
{
int fd = -1;
char* tmp = NULL;
size_t len;
len = strlen(base)+6+1;
if((tmp = (char*)malloc(len))==NULL)
goto done;
strncpy(tmp,base,len);
strlcat(tmp, "XXXXXX", len);
fd = NCmkstemp(tmp);
if(fd < 0) {
nclog(NCLOGERR, "Could not create temp file: %s",tmp);
goto done;
}
done:
if(fd >= 0) close(fd);
return tmp;
}
int
NC_readfile(const char* filename, NCbytes* content)
{
int ret = NC_NOERR;
FILE* stream = NULL;
char part[1024];
#ifdef _WIN32
stream = NCfopen(filename,"rb");
#else
stream = NCfopen(filename,"r");
#endif
if(stream == NULL) {ret=errno; goto done;}
for(;;) {
size_t count = fread(part, 1, sizeof(part), stream);
if(count <= 0) break;
ncbytesappendn(content,part,count);
if(ferror(stream)) {ret = NC_EIO; goto done;}
if(feof(stream)) break;
}
ncbytesnull(content);
done:
if(stream) fclose(stream);
return ret;
}
int
NC_writefile(const char* filename, size_t size, void* content)
{
int ret = NC_NOERR;
FILE* stream = NULL;
void* p;
size_t remain;
#ifdef _WIN32
stream = NCfopen(filename,"wb");
#else
stream = NCfopen(filename,"w");
#endif
if(stream == NULL) {ret=errno; goto done;}
p = content;
remain = size;
while(remain > 0) {
size_t written = fwrite(p, 1, remain, stream);
if(ferror(stream)) {ret = NC_EIO; goto done;}
if(feof(stream)) break;
remain -= written;
}
done:
if(stream) fclose(stream);
return ret;
}
/*
Parse a path as a url and extract the modelist.
If the path is not a URL, then return a NULL list.
If a URL, but modelist is empty or does not exist,
then return empty list.
*/
int
NC_getmodelist(const char* modestr, NClist** modelistp)
{
int stat=NC_NOERR;
NClist* modelist = NULL;
modelist = nclistnew();
if(modestr == NULL || strlen(modestr) == 0) goto done;
/* Parse the mode string at the commas or EOL */
if((stat = NC_split_delim(modestr,',',modelist))) goto done;
done:
if(stat == NC_NOERR) {
if(modelistp) {*modelistp = modelist; modelist = NULL;}
} else
nclistfree(modelist);
return stat;
}
/*
Check "mode=" list for a path and return 1 if present, 0 otherwise.
*/
int
NC_testpathmode(const char* path, const char* tag)
{
int found = 0;
NCURI* uri = NULL;
ncuriparse(path,&uri);
if(uri != NULL) {
found = NC_testmode(uri,tag);
ncurifree(uri);
}
return found;
}
/*
Check "mode=" list for a url and return 1 if present, 0 otherwise.
*/
int
NC_testmode(NCURI* uri, const char* tag)
{
int stat = NC_NOERR;
int found = 0;
int i;
const char* modestr = NULL;
NClist* modelist = NULL;
modestr = ncurifragmentlookup(uri,"mode");
if(modestr == NULL) goto done;
/* Parse mode str */
if((stat = NC_getmodelist(modestr,&modelist))) goto done;
/* Search for tag */
for(i=0;i<nclistlength(modelist);i++) {
const char* mode = (const char*)nclistget(modelist,i);
if(strcasecmp(mode,tag)==0) {found = 1; break;}
}
done:
nclistfreeall(modelist);
return found;
}
#ifdef __APPLE__
int isinf(double x)
{
union { unsigned long long u; double f; } ieee754;
ieee754.f = x;
return ( (unsigned)(ieee754.u >> 32) & 0x7fffffff ) == 0x7ff00000 &&
( (unsigned)ieee754.u == 0 );
}
int isnan(double x)
{
union { unsigned long long u; double f; } ieee754;
ieee754.f = x;
return ( (unsigned)(ieee754.u >> 32) & 0x7fffffff ) +
( (unsigned)ieee754.u != 0 ) > 0x7ff00000;
}
#endif /*APPLE*/
int
NC_split_delim(const char* arg, char delim, NClist* segments)
{
int stat = NC_NOERR;
const char* p = NULL;
const char* q = NULL;
ptrdiff_t len = 0;
char* seg = NULL;
if(arg == NULL || strlen(arg)==0 || segments == NULL)
goto done;
p = arg;
if(p[0] == delim) p++;
for(;*p;) {
q = strchr(p,delim);
if(q==NULL)
q = p + strlen(p); /* point to trailing nul */
len = (q - p);
if(len == 0)
{stat = NC_EURL; goto done;}
if((seg = malloc(len+1)) == NULL)
{stat = NC_ENOMEM; goto done;}
memcpy(seg,p,len);
seg[len] = '\0';
nclistpush(segments,seg);
seg = NULL; /* avoid mem errors */
if(*q) p = q+1; else p = q;
}
done:
nullfree(seg);
return stat;
}
/* concat the the segments with each segment preceded by '/' */
int
NC_join(NClist* segments, char** pathp)
{
int stat = NC_NOERR;
int i;
NCbytes* buf = NULL;
if(segments == NULL)
{stat = NC_EINVAL; goto done;}
if((buf = ncbytesnew())==NULL)
{stat = NC_ENOMEM; goto done;}
if(nclistlength(segments) == 0)
ncbytescat(buf,"/");
else for(i=0;i<nclistlength(segments);i++) {
const char* seg = nclistget(segments,i);
if(seg[0] != '/')
ncbytescat(buf,"/");
ncbytescat(buf,seg);
}
done:
if(!stat) {
if(pathp) *pathp = ncbytesextract(buf);
}
ncbytesfree(buf);
return stat;
}