netcdf-c/libnczarr/zmap.c
Dennis Heimbigner e7d5f24078 Add zip file support
The primary change is to support the use of a zip file as a
storage format. Simultaneously the .nz4 support is made obsolete

Use of zip requires the libzip support library, so a number of
changes to the build files (Makefile.am, CMakeLists.txt) are
necessary to locate and incorporate libzip.  The nczarr_tests
tests are also changed to add zip testing.

Other changes:
* Make sure distcheck leaves no files around.
* Add some functions to netcdf_aux to export some functions of libnetcdf.
* Add a new error NC_EFOUND as the complement of NC_EEMPTY.
* Add tracing support to nclog and use it in libnczarr.
* Modify the zmap interface to support the writeonce semantics of zip.
* Create a new s3util.c to support a variety of S3 auxilliary functions.
* EXTERNL'ize a number of functions so they can be used in s3util.
* Add support for the S3 ListObjects CommonPrefixes mechanism
  to improve search.
* Add experimental support for running nczarr X s3 tests against
  the actual Amazon S3 cloud.
2021-01-28 20:11:01 -07:00

517 lines
11 KiB
C

/*
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/
#include "zincludes.h"
#include <stdarg.h>
#include "ncpathmgr.h"
/**************************************************/
/* Import the current implementations */
extern NCZMAP_DS_API zmap_file;
#ifdef USE_HDF5
extern NCZMAP_DS_API zmap_nz4;
#endif
#ifdef ENABLE_NCZARR_ZIP
extern NCZMAP_DS_API zmap_zip;
#endif
#ifdef ENABLE_S3_SDK
extern NCZMAP_DS_API zmap_s3sdk;
#endif
/**************************************************/
NCZM_PROPERTIES
nczmap_properties(NCZM_IMPL impl)
{
switch (impl) {
case NCZM_FILE: return zmap_file.properties;
#ifdef ENABLE_NCZARR_ZIP
case NCZM_ZIP: return zmap_zip.properties;
#endif
#ifdef ENABLE_S3_SDK
case NCZM_S3: return zmap_s3sdk.properties;
#endif
default: break;
}
return NCZM_UNIMPLEMENTED;
}
int
nczmap_create(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp)
{
int stat = NC_NOERR;
NCZMAP* map = NULL;
NCURI* uri = NULL;
if(path == NULL || strlen(path) == 0)
{stat = NC_EINVAL; goto done;}
if(mapp) *mapp = NULL;
switch (impl) {
case NCZM_FILE:
stat = zmap_file.create(path, mode, flags, parameters, &map);
if(stat) goto done;
break;
#ifdef ENABLE_NCZARR_ZIP
case NCZM_ZIP:
stat = zmap_zip.create(path, mode, flags, parameters, &map);
if(stat) goto done;
break;
#endif
#ifdef ENABLE_S3_SDK
case NCZM_S3:
stat = zmap_s3sdk.create(path, mode, flags, parameters, &map);
if(stat) goto done;
break;
#endif
default:
{stat = NC_ENOTBUILT; goto done;}
}
if(mapp) *mapp = map;
done:
ncurifree(uri);
return THROW(stat);
}
int
nczmap_open(NCZM_IMPL impl, const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp)
{
int stat = NC_NOERR;
NCZMAP* map = NULL;
NCURI* uri = NULL;
if(path == NULL || strlen(path) == 0)
{stat = NC_EINVAL; goto done;}
if(mapp) *mapp = NULL;
switch (impl) {
case NCZM_FILE:
stat = zmap_file.open(path, mode, flags, parameters, &map);
if(stat) goto done;
break;
#ifdef ENABLE_NCZARR_ZIP
case NCZM_ZIP:
stat = zmap_zip.open(path, mode, flags, parameters, &map);
if(stat) goto done;
break;
#endif
#ifdef ENABLE_S3_SDK
case NCZM_S3:
stat = zmap_s3sdk.open(path, mode, flags, parameters, &map);
if(stat) goto done;
break;
#endif
default:
{stat = NC_ENOTBUILT; goto done;}
}
done:
ncurifree(uri);
if(!stat) {
if(mapp) *mapp = map;
}
return THROW(stat);
}
/**************************************************/
/* API Wrapper */
int
nczmap_close(NCZMAP* map, int delete)
{
int stat = NC_NOERR;
if(map && map->api)
stat = map->api->close(map,delete);
return THROW(stat);
}
int
nczmap_exists(NCZMAP* map, const char* key)
{
return map->api->exists(map, key);
}
int
nczmap_len(NCZMAP* map, const char* key, size64_t* lenp)
{
return map->api->len(map, key, lenp);
}
int
nczmap_read(NCZMAP* map, const char* key, size64_t start, size64_t count, void* content)
{
return map->api->read(map, key, start, count, content);
}
int
nczmap_write(NCZMAP* map, const char* key, size64_t start, size64_t count, const void* content)
{
return map->api->write(map, key, start, count, content);
}
int
nczmap_search(NCZMAP* map, const char* prefix, NClist* matches)
{
return map->api->search(map, prefix, matches);
}
/**************************************************/
/* Utilities */
int
nczm_split(const char* path, NClist* segments)
{
return nczm_split_delim(path,NCZM_SEP[0],segments);
}
int
nczm_split_delim(const char* path, char delim, NClist* segments)
{
int stat = NC_NOERR;
const char* p = NULL;
const char* q = NULL;
ptrdiff_t len = 0;
char* seg = NULL;
if(path == NULL || strlen(path)==0 || segments == NULL)
{stat = NC_EINVAL; goto done;}
p = path;
if(p[0] == delim) p++;
for(;*p;) {
q = strchr(p,delim);
if(q==NULL)
q = p + strlen(p); /* point to trailing nul */
len = (q - p);
if(len == 0)
{stat = NC_EURL; goto done;}
if((seg = malloc(len+1)) == NULL)
{stat = NC_ENOMEM; goto done;}
memcpy(seg,p,len);
seg[len] = '\0';
nclistpush(segments,seg);
seg = NULL; /* avoid mem errors */
if(*q) p = q+1; else p = q;
}
done:
nullfree(seg);
return THROW(stat);
}
/* concat the the segments with each segment preceded by '/' */
int
nczm_join(NClist* segments, char** pathp)
{
int stat = NC_NOERR;
int i;
NCbytes* buf = NULL;
if(segments == NULL)
{stat = NC_EINVAL; goto done;}
if((buf = ncbytesnew())==NULL)
{stat = NC_ENOMEM; goto done;}
if(nclistlength(segments) == 0)
ncbytescat(buf,"/");
else for(i=0;i<nclistlength(segments);i++) {
const char* seg = nclistget(segments,i);
if(seg[0] != '/')
ncbytescat(buf,"/");
ncbytescat(buf,seg);
}
done:
if(!stat) {
if(pathp) *pathp = ncbytesextract(buf);
}
ncbytesfree(buf);
return THROW(stat);
}
int
nczm_concat(const char* prefix, const char* suffix, char** pathp)
{
NCbytes* buf = ncbytesnew();
if(prefix == NULL || strlen(prefix)==0) prefix = NCZM_SEP;
if(suffix == NULL) suffix = "";
ncbytescat(buf,prefix);
if(ncbytesget(buf,ncbyteslength(buf)-1) == NCZM_SEP[0])
ncbytessetlength(buf,ncbyteslength(buf)-1);
if(strlen(suffix) > 0 && suffix[0] != NCZM_SEP[0])
ncbytescat(buf,NCZM_SEP);
ncbytescat(buf,suffix);
if(pathp) *pathp = ncbytesextract(buf);
ncbytesfree(buf);
return NC_NOERR;
}
/* Concat multiple strings, but with no intervening separators */
int
nczm_appendn(char** resultp, int n, ...)
{
va_list args;
NCbytes* buf = ncbytesnew();
int i;
va_start(args, n);
for(i=0;i<n;i++) {
char* s = va_arg(args,char*);
if(s != NULL) ncbytescat(buf,s);
}
ncbytesnull(buf);
va_end(args);
if(resultp) {*resultp = ncbytesextract(buf);}
ncbytesfree(buf);
return NC_NOERR;
}
/* A segment is defined as a '/' plus characters following up
to the end or upto the next '/'
*/
int
nczm_divide_at(const char* key, int nsegs, char** prefixp, char** suffixp)
{
int stat = NC_NOERR;
char* prefix = NULL;
char* suffix = NULL;
size_t len, i;
ptrdiff_t delta;
const char* p;
int abssegs = (nsegs >= 0 ?nsegs: -nsegs);
int presegs = 0;
/* Special case */
if(key == NULL || strlen(key) == 0) goto done;
p = (key[0] == '/' ? key+1 : key);
/* Count number of segments */
for(len=0;;) {
const char* q = strchr(p,'/');
len++;
if(q == NULL) break;
p = q+1; /* start past leading '/' of next segment */
}
if(abssegs > len)
{stat = NC_EINVAL; goto done;}
/* find split point */
if(nsegs >= 0)
{presegs = abssegs;}
else
{presegs = (len - abssegs);}
/* skip past the first presegs segments */
for(p=key,i=0;i<presegs;i++) {
const char* q = strchr(p+1,'/');
if(q == NULL) {p = (p + strlen(p)); break;}
else p = q;
}
/* p should point at the presegs+1 start point */
delta = (p-key);
if(prefixp) {
prefix = malloc(delta+1);
memcpy(prefix,key,delta);
prefix[delta] = '\0';
*prefixp = prefix;
}
if(suffixp) {
suffix = strdup(p);
*suffixp = suffix;
}
done:
return stat;
}
int
nczm_clear(NCZMAP* map)
{
if(map)
nullfree(map->url);
return NC_NOERR;
}
int
nczm_isabsolutepath(const char* path)
{
if(path == NULL) return 0;
switch (path[0]) {
case '\\': return 1;
case '/': return 1;
case '\0': break;
default:
/* Check for windows drive letter */
if(NChasdriveletter(path)) return 1;
break;
}
return 0;
}
/* Convert forward slash to backslash ( !localize) or vice-versa (localize)*/
int
nczm_localize(const char* path, char** localpathp, int localize)
{
int stat = NC_NOERR;
char* localpath = NULL;
char* p;
int forward = 1;
int offset = 0;
#ifdef _MSC_VER
forward = (localize?0:1);
#endif
/* If path comes from a url, then it may start with: /x:/...
where x is a drive letter. If so, then remove leading / */
if(path[0] == '/' && NChasdriveletter(path+1))
offset = 1;
if((localpath = strdup(path+offset))==NULL) return NC_ENOMEM;
for(p=localpath;*p;p++) {
if(forward && *p == '\\') *p = '/';
else if(!forward && *p == '/') *p = '\\';
}
if(localpathp) {*localpathp = localpath; localpath = NULL;}
nullfree(localpath);
return stat;
}
/* Convert path0 to be:
1. absolute -- including drive letters
2. forward slashed -- we will convert back to back slash in nczm_fixpath
*/
int
nczm_canonicalpath(const char* path, char** cpathp)
{
int ret = NC_NOERR;
char* cpath = NULL;
char* tmp = NULL;
if(path == NULL)
{cpath = NULL; goto done;}
/* Process path to make it be windows compatible */
if((tmp = NCpathcvt(path))==NULL) {ret = NC_ENOMEM; goto done;}
/* Fix slashes to be forward for now */
if((ret = nczm_localize(tmp,&cpath,!LOCALIZE))) goto done;
if(cpathp) {*cpathp = cpath; cpath = NULL;}
done:
nullfree(tmp);
nullfree(cpath);
return THROW(ret);
}
/* extract the first segment of a path */
int
nczm_segment1(const char* path, char** seg1p)
{
int ret = NC_NOERR;
char* seg1 = NULL;
const char* p = NULL;
const char* q = NULL;
ptrdiff_t delta;
if(path == NULL)
{seg1 = NULL; goto done;}
p = path;
if(*p == '/') p++; /* skip any leading '/' */
q = strchr(p,'/');
if(q == NULL) q = p+strlen(p); /* point to stop character */
delta = (q-p);
if((seg1 = (char*)malloc(delta+1))==NULL)
{ret = NC_ENOMEM; goto done;}
memcpy(seg1,p,delta);
seg1[delta] = '\0';
if(seg1p) {*seg1p = seg1; seg1 = NULL;}
done:
nullfree(seg1);
return THROW(ret);
}
/*
Extract the basename from a path.
Basename is last segment minus one extension.
*/
int
nczm_basename(const char* path, char** basep)
{
int ret = NC_NOERR;
char* base = NULL;
const char* p = NULL;
const char* q = NULL;
ptrdiff_t delta;
if(path == NULL)
{base = NULL; goto done;}
p = strrchr(path,'/');
if(p == NULL) p = path; else p++;
q = strrchr(p,'.');
if(q == NULL) q = p + strlen(p);
delta = (q-p);
if((base = (char*)malloc(delta+1))==NULL)
{ret = NC_ENOMEM; goto done;}
memcpy(base,p,delta);
base[delta] = '\0';
if(basep) {*basep = base; base = NULL;}
done:
nullfree(base);
return THROW(ret);
}
/* bubble sort a list of strings */
void
nczm_sortlist(NClist* l)
{
nczm_sortenvv(nclistlength(l),(char**)nclistcontents(l));
}
/* bubble sort a list of strings */
void
nczm_sortenvv(int n, char** envv)
{
size_t i, switched;
if(n <= 1) return;
do {
switched = 0;
for(i=0;i<n-1;i++) {
char* ith = envv[i];
char* ith1 = envv[i+1];
if(strcmp(ith,ith1) > 0) {
envv[i] = ith1;
envv[i+1] = ith;
switched = 1;
}
}
} while(switched);
#if 0
for(i=0;i<n;i++)
fprintf(stderr,"sorted: [%d] %s\n",i,(const char*)envv[i]);
#endif
}
void
NCZ_freeenvv(int n, char** envv)
{
int i;
char** p;
if(envv == NULL) return;
if(n < 0)
{for(n=0, p = envv; *p; n++); /* count number of strings */}
for(i=0;i<n;i++)
if(envv[i]) free(envv[i]);
free(envv);
}