2021-01-29 11:11:01 +08:00
|
|
|
/*
|
|
|
|
* Copyright 2018, University Corporation for Atmospheric Research
|
|
|
|
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
|
|
|
/* Not sure this has any effect */
|
|
|
|
#define _LARGEFILE_SOURCE 1
|
|
|
|
#define _LARGEFILE64_SOURCE 1
|
|
|
|
|
|
|
|
#include "zincludes.h"
|
|
|
|
|
|
|
|
#include <errno.h>
|
|
|
|
#include <zip.h>
|
|
|
|
|
|
|
|
#include "fbits.h"
|
|
|
|
#include "ncpathmgr.h"
|
|
|
|
|
|
|
|
#undef CACHESEARCH
|
|
|
|
|
|
|
|
#define VERIFY
|
|
|
|
|
|
|
|
/*Mnemonic*/
|
|
|
|
#define FLAG_ISDIR 1
|
|
|
|
#define FLAG_CREATE 1
|
|
|
|
#define SKIPLAST 1
|
|
|
|
#define WHOLEPATH 0
|
|
|
|
|
|
|
|
#define NCZM_ZIP_V1 1
|
|
|
|
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
#define ZIP_PROPERTIES (NCZM_WRITEONCE)
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
/*
|
|
|
|
Do a simple mapping of our simplified map model
|
|
|
|
to a zip-file
|
|
|
|
|
|
|
|
Every dataset is assumed to be rooted at some directory in the
|
|
|
|
zip file tree. So, its location is defined by some path to a
|
|
|
|
zip file representing the dataset.
|
|
|
|
|
|
|
|
For the object API, the mapping is as follows:
|
|
|
|
1. Every content-bearing object (e.g. .zgroup or .zarray) is mapped to a zip entry.
|
|
|
|
This means that if a key points to a content bearing object then
|
|
|
|
no other key can have that content bearing key as a suffix.
|
|
|
|
2. The meta data containing files are assumed to contain
|
|
|
|
UTF-8 character data.
|
|
|
|
3. The chunk containing files are assumed to contain raw unsigned 8-bit byte data.
|
|
|
|
4. The objects may or may not be compressed; this implementation writes uncompressed objects.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* define the var name containing an objects content */
|
|
|
|
#define ZCONTENT "data"
|
|
|
|
|
|
|
|
/* Define the "subclass" of NCZMAP */
|
|
|
|
typedef struct ZZMAP {
|
|
|
|
NCZMAP map;
|
|
|
|
char* root;
|
|
|
|
char* dataset; /* prefix for all keys in zip file */
|
|
|
|
zip_t* archive;
|
|
|
|
char** searchcache;
|
|
|
|
} ZZMAP;
|
|
|
|
|
|
|
|
typedef zip_int64_t ZINDEX;;
|
|
|
|
|
|
|
|
/* Forward */
|
|
|
|
static NCZMAP_API zapi;
|
|
|
|
static int zipclose(NCZMAP* map, int delete);
|
|
|
|
static int zzcreategroup(ZZMAP*, const char* key, int nskip);
|
|
|
|
static int zzlookupobj(ZZMAP*, const char* key, ZINDEX* fd);
|
|
|
|
static int zzlen(ZZMAP* zzmap, ZINDEX zindex, size64_t* lenp);
|
|
|
|
static int zipmaperr(ZZMAP* zzmap);
|
|
|
|
static int ziperr(zip_error_t* zerror);
|
|
|
|
static int ziperrno(int zerror);
|
|
|
|
static void freesearchcache(char** cache);
|
|
|
|
|
|
|
|
static int zzinitialized = 0;
|
|
|
|
|
|
|
|
static void
|
2021-09-28 08:36:33 +08:00
|
|
|
zipinitialize(void)
|
2021-01-29 11:11:01 +08:00
|
|
|
{
|
|
|
|
if(!zzinitialized) {
|
|
|
|
ZTRACE(7,NULL);
|
|
|
|
zzinitialized = 1;
|
|
|
|
(void)ZUNTRACE(NC_NOERR);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Define the Dataset level API */
|
|
|
|
|
|
|
|
/*
|
|
|
|
@param datasetpath abs path in the file tree of the root of the dataset'
|
|
|
|
might be a relative path.
|
|
|
|
@param mode the netcdf-c mode flags
|
|
|
|
@param flags extra flags
|
|
|
|
@param flags extra parameters
|
|
|
|
@param mapp return the map object in this
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
zipcreate(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = NULL;
|
|
|
|
NCURI* url = NULL;
|
|
|
|
zip_flags_t zipflags = 0;
|
|
|
|
int zerrno = ZIP_ER_OK;
|
|
|
|
ZINDEX zindex = -1;
|
2021-12-24 13:18:56 +08:00
|
|
|
char* abspath = NULL;
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
NC_UNUSED(parameters);
|
|
|
|
ZTRACE(6,"path=%s mode=%d flag=%llu",path,mode,flags);
|
|
|
|
|
2021-09-28 08:36:33 +08:00
|
|
|
if(!zzinitialized) zipinitialize();
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
/* Fixup mode flags */
|
|
|
|
mode = (NC_NETCDF4 | NC_WRITE | mode);
|
|
|
|
|
|
|
|
/* path must be a url with file: protocol*/
|
|
|
|
ncuriparse(path,&url);
|
|
|
|
if(url == NULL)
|
|
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
if(strcasecmp(url->protocol,"file") != 0)
|
|
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
|
|
|
|
/* Build the zz state */
|
|
|
|
if((zzmap = calloc(1,sizeof(ZZMAP))) == NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
|
|
|
|
zzmap->map.format = NCZM_ZIP;
|
|
|
|
zzmap->map.url = ncuribuild(url,NULL,NULL,NCURIALL);
|
|
|
|
zzmap->map.flags = flags;
|
|
|
|
/* create => NC_WRITE */
|
|
|
|
zzmap->map.mode = mode;
|
|
|
|
zzmap->map.api = &zapi;
|
2021-12-24 13:18:56 +08:00
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Since root is in canonical form, we need to convert to local form */
|
|
|
|
if((zzmap->root = NCpathcvt(url->path))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
2021-12-24 13:18:56 +08:00
|
|
|
|
|
|
|
/* Make the root path be absolute */
|
|
|
|
if((abspath = NCpathabsolute(zzmap->root)) == NULL)
|
|
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
nullfree(zzmap->root);
|
|
|
|
zzmap->root = abspath;
|
|
|
|
abspath = NULL;
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Extract the dataset name */
|
|
|
|
if((stat = nczm_basename(url->path,&zzmap->dataset))) goto done;
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
/* Set zip openflags */
|
|
|
|
zipflags |= ZIP_CREATE;
|
|
|
|
if(fIsSet(mode,NC_NOCLOBBER))
|
|
|
|
zipflags |= ZIP_EXCL;
|
|
|
|
else
|
|
|
|
zipflags |= ZIP_TRUNCATE;
|
|
|
|
|
|
|
|
#ifdef VERIFY
|
|
|
|
zipflags |= ZIP_CHECKCONS;
|
|
|
|
#endif
|
|
|
|
if((zzmap->archive = zip_open(zzmap->root,zipflags,&zerrno))==NULL)
|
|
|
|
{stat = ziperrno(zerrno); goto done;}
|
|
|
|
|
|
|
|
/* Tell it about the dataset as a dir */
|
|
|
|
if((zindex = zip_dir_add(zzmap->archive, zzmap->dataset, ZIP_FL_ENC_UTF_8))<0)
|
|
|
|
{stat = zipmaperr(zzmap); goto done;}
|
|
|
|
|
|
|
|
/* Dataset superblock will be written by higher layer */
|
|
|
|
|
|
|
|
if(mapp) {*mapp = (NCZMAP*)zzmap; zzmap = NULL;}
|
|
|
|
|
|
|
|
done:
|
2021-12-24 13:18:56 +08:00
|
|
|
nullfree(abspath);
|
2021-01-29 11:11:01 +08:00
|
|
|
ncurifree(url);
|
|
|
|
if(zzmap) zipclose((NCZMAP*)zzmap,1);
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
@param datasetpath abs path in the file tree of the root of the dataset'
|
|
|
|
might be a relative path.
|
|
|
|
@param mode the netcdf-c mode flags
|
|
|
|
@param flags extra flags
|
|
|
|
@param flags extra parameters
|
|
|
|
@param mapp return the map object in this
|
|
|
|
*/
|
|
|
|
|
|
|
|
static int
|
|
|
|
zipopen(const char *path, int mode, size64_t flags, void* parameters, NCZMAP** mapp)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = NULL;
|
|
|
|
NCURI*url = NULL;
|
|
|
|
zip_flags_t zipflags = 0;
|
|
|
|
int zerrno = ZIP_ER_OK;
|
2021-12-24 13:18:56 +08:00
|
|
|
char* abspath = NULL;
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
NC_UNUSED(parameters);
|
|
|
|
ZTRACE(6,"path=%s mode=%d flags=%llu",path,mode,flags);
|
|
|
|
|
2021-09-28 08:36:33 +08:00
|
|
|
if(!zzinitialized) zipinitialize();
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
/* Fixup mode flags */
|
|
|
|
mode = (NC_NETCDF4 | mode);
|
|
|
|
|
|
|
|
/* path must be a url with file: protocol*/
|
|
|
|
ncuriparse(path,&url);
|
|
|
|
if(url == NULL)
|
|
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
if(strcasecmp(url->protocol,"file") != 0)
|
|
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
|
|
|
|
/* Build the zz state */
|
|
|
|
if((zzmap = calloc(1,sizeof(ZZMAP))) == NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
|
|
|
|
zzmap->map.format = NCZM_ZIP;
|
|
|
|
zzmap->map.url = ncuribuild(url,NULL,NULL,NCURIALL);
|
|
|
|
zzmap->map.flags = flags;
|
|
|
|
zzmap->map.mode = mode;
|
|
|
|
zzmap->map.api = (NCZMAP_API*)&zapi;
|
2021-09-03 07:04:26 +08:00
|
|
|
/* Since root is in canonical form, we need to convert to local form */
|
|
|
|
if((zzmap->root = NCpathcvt(url->path))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
2021-12-24 13:18:56 +08:00
|
|
|
/* Make the root path be absolute */
|
|
|
|
if((abspath = NCpathabsolute(zzmap->root)) == NULL)
|
|
|
|
{stat = NC_EURL; goto done;}
|
|
|
|
nullfree(zzmap->root);
|
|
|
|
zzmap->root = abspath;
|
|
|
|
abspath = NULL;
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
/* Set zip open flags */
|
|
|
|
zipflags |= ZIP_CHECKCONS;
|
|
|
|
if(!fIsSet(mode,NC_WRITE))
|
|
|
|
zipflags |= ZIP_RDONLY;
|
|
|
|
|
|
|
|
#ifdef VERIFY
|
|
|
|
zipflags |= ZIP_CHECKCONS;
|
|
|
|
#endif
|
|
|
|
/* Open the file */
|
|
|
|
if((zzmap->archive = zip_open(zzmap->root,zipflags,&zerrno))==NULL)
|
|
|
|
{stat = ziperrno(zerrno); goto done;}
|
|
|
|
|
|
|
|
/* Use entry 0 to obtain the dataset name */
|
|
|
|
{
|
|
|
|
const char* name;
|
|
|
|
zip_int64_t num_entries;
|
|
|
|
|
|
|
|
num_entries = zip_get_num_entries(zzmap->archive, (zip_flags_t)0);
|
|
|
|
if(num_entries == 0) {stat = NC_EEMPTY; goto done;}
|
|
|
|
/* get 0'th entry name */
|
|
|
|
if((name = zip_get_name(zzmap->archive, 0, (zip_flags_t)0))==NULL)
|
|
|
|
{stat = zipmaperr(zzmap); goto done;}
|
|
|
|
if(name[0] == '\0' || name[0] == '/')
|
|
|
|
{stat = NC_EBADID; goto done;}
|
|
|
|
/* Extract the first segment as the dataset name */
|
|
|
|
if((nczm_segment1(name,&zzmap->dataset))) goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Dataset superblock will be read by higher layer */
|
|
|
|
|
|
|
|
if(mapp) {*mapp = (NCZMAP*)zzmap; zzmap = NULL;}
|
|
|
|
|
|
|
|
done:
|
2021-12-24 13:18:56 +08:00
|
|
|
nullfree(abspath);
|
2021-01-29 11:11:01 +08:00
|
|
|
ncurifree(url);
|
|
|
|
if(zzmap) zipclose((NCZMAP*)zzmap,0);
|
|
|
|
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
static int
|
|
|
|
ziptruncate(const char* surl)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
NCURI* url = NULL;
|
|
|
|
int errorp = 0;
|
|
|
|
zip_t *zip = NULL;
|
|
|
|
|
|
|
|
ZTRACE(6,"url=%s",surl);
|
|
|
|
ncuriparse(surl,&url);
|
|
|
|
if(url == NULL) {stat = NC_EURL; goto done;}
|
|
|
|
zip = zip_open(url->path, ZIP_CREATE | ZIP_TRUNCATE, &errorp);
|
|
|
|
zip_close(zip);
|
|
|
|
done:
|
|
|
|
ncurifree(url);
|
|
|
|
return stat;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/* Map API */
|
|
|
|
|
|
|
|
static int
|
|
|
|
zipclose(NCZMAP* map, int delete)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int zerrno = 0;
|
|
|
|
ZZMAP* zzmap = (ZZMAP*)map;
|
|
|
|
|
|
|
|
if(zzmap == NULL) return NC_NOERR;
|
|
|
|
|
|
|
|
ZTRACE(6,"map=%s delete=%d",map->url,delete);
|
|
|
|
|
|
|
|
/* Close the zip */
|
|
|
|
if(delete)
|
|
|
|
zip_discard(zzmap->archive);
|
|
|
|
else {
|
|
|
|
if((zerrno=zip_close(zzmap->archive)))
|
|
|
|
stat = ziperrno(zerrno);
|
|
|
|
}
|
|
|
|
if(delete)
|
|
|
|
NCremove(zzmap->root);
|
|
|
|
|
|
|
|
zzmap->archive = NULL;
|
|
|
|
nczm_clear(map);
|
|
|
|
nullfree(zzmap->root);
|
|
|
|
nullfree(zzmap->dataset);
|
|
|
|
zzmap->root = NULL;
|
|
|
|
freesearchcache(zzmap->searchcache);
|
|
|
|
free(zzmap);
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
2021-01-29 11:11:01 +08:00
|
|
|
/**************************************************/
|
|
|
|
/* Object API */
|
|
|
|
|
|
|
|
static int
|
|
|
|
zipexists(NCZMAP* map, const char* key)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = (ZZMAP*)map;
|
|
|
|
ZINDEX zindex = -1;
|
|
|
|
|
|
|
|
ZTRACE(6,"map=%s key=%s",map->url,key);
|
|
|
|
switch(stat=zzlookupobj(zzmap,key,&zindex)) {
|
|
|
|
case NC_NOERR: break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case NC_ENOOBJECT: stat = NC_EEMPTY; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_EEMPTY: break;
|
|
|
|
default: break;
|
|
|
|
}
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ziplen(NCZMAP* map, const char* key, size64_t* lenp)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = (ZZMAP*)map;
|
|
|
|
size64_t len = 0;
|
|
|
|
ZINDEX zindex = -1;
|
|
|
|
|
|
|
|
ZTRACE(6,"map=%s key=%s",map->url,key);
|
|
|
|
|
|
|
|
switch(stat = zzlookupobj(zzmap,key,&zindex)) {
|
|
|
|
case NC_NOERR:
|
|
|
|
if((stat = zzlen(zzmap,zindex,&len))) goto done;
|
|
|
|
break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case NC_ENOOBJECT: stat = NC_EEMPTY; len = 0; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_EEMPTY: len = 0; break; /* |dir|==0 */
|
|
|
|
default: goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
if(lenp) *lenp = len;
|
|
|
|
|
|
|
|
done:
|
|
|
|
return ZUNTRACEX(stat,"len=%llu",(lenp?*lenp:777777777777));
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
zipread(NCZMAP* map, const char* key, size64_t start, size64_t count, void* content)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = (ZZMAP*)map; /* cast to true type */
|
|
|
|
zip_file_t* zfile = NULL;
|
|
|
|
ZINDEX zindex = -1;
|
|
|
|
zip_flags_t zipflags = 0;
|
|
|
|
int zerrno;
|
|
|
|
size64_t endpoint;
|
|
|
|
char* buffer = NULL;
|
|
|
|
char* truekey = NULL;
|
|
|
|
zip_int64_t red = 0;
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
|
2021-01-29 11:11:01 +08:00
|
|
|
ZTRACE(6,"map=%s key=%s start=%llu count=%llu",map->url,key,start,count);
|
|
|
|
|
|
|
|
switch(stat = zzlookupobj(zzmap,key,&zindex)) {
|
|
|
|
case NC_NOERR: break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case NC_ENOOBJECT: stat = NC_EEMPTY; /* fall thru */
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_EEMPTY: /* its a dir; fall thru*/
|
|
|
|
default: goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Note, assume key[0] == '/' */
|
|
|
|
if((stat = nczm_appendn(&truekey,2,zzmap->dataset,key)))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
zfile = zip_fopen(zzmap->archive, truekey, zipflags);
|
|
|
|
if(zfile == NULL)
|
|
|
|
{stat = (zipmaperr(zzmap)); goto done;}
|
|
|
|
|
|
|
|
/* Ideally, we would like to seek to the start point,
|
|
|
|
but that will fail if the file is compressed, so
|
|
|
|
we need to read whole thing and extract what we need
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* read data starting at zero */
|
|
|
|
if(start == 0) { /*optimize to read directly into content */
|
|
|
|
if((red = zip_fread(zfile, content, (zip_uint64_t)count)) < 0)
|
|
|
|
{stat = (zipmaperr(zzmap)); goto done;}
|
|
|
|
if(red < count) {stat = NC_EINTERNAL; goto done;}
|
|
|
|
} else {
|
|
|
|
endpoint = start + count;
|
|
|
|
if((buffer = malloc(endpoint))==NULL) /* consider caching this */
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
if((red = zip_fread(zfile, buffer, (zip_uint64_t)endpoint)) < 0)
|
|
|
|
{stat = (zipmaperr(zzmap)); goto done;}
|
|
|
|
if(red < endpoint) {stat = NC_EINTERNAL; goto done;}
|
|
|
|
/* Extract what we need */
|
|
|
|
memcpy(content,buffer+start,count);
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
nullfree(truekey);
|
|
|
|
nullfree(buffer);
|
|
|
|
if(zfile != NULL && (zerrno=zip_fclose(zfile)) != 0)
|
|
|
|
{stat = ziperrno(zerrno);}
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
zipwrite(NCZMAP* map, const char* key, size64_t count, const void* content)
|
2021-01-29 11:11:01 +08:00
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = (ZZMAP*)map; /* cast to true type */
|
|
|
|
char* truekey = NULL;
|
|
|
|
zip_flags_t zflags = 0;
|
|
|
|
zip_source_t* zs = NULL;
|
|
|
|
ZINDEX zindex = -1;
|
|
|
|
zip_int32_t compression = 0;
|
|
|
|
zip_error_t zerror;
|
|
|
|
void* localbuffer = NULL;
|
Upgrade the nczarr code to match Zarr V2
Re: https://github.com/zarr-developers/zarr-python/pull/716
The Zarr version 2 spec has been extended to include the ability
to choose the dimension separator in chunk name keys. The legal
separators has been extended from {'.'} to {'.' '/'}. So now it
is possible to use a key like "0/1/2/0" for chunk names.
This PR implements this for NCZarr. The V2 spec now says that
this separator can be set on a per-variable basis. For now, I
have chosen to allow this be set only globally by adding a key
named "ZARR.DIMENSION_SEPARATOR=<char>" in the
.daprc/.dodsrc/ncrc file. Currently, the only legal separator
characters are '.' (the default) and '/'. On writing, this key
will only be written if its value is different than the default.
This change caused problems because supporting a separator of '/'
is difficult to parse when keys/paths use '/' as the path separator.
A test case was added for this.
Additionally, make nczarr be enabled default by default. This required
some additional changes so that if zip and/or AWS S3 sdk are unavailable,
then they are disabled for NCZarr.
In addition the following unrelated changes were made.
1. Tested that pure-zarr mode could read an nczarr formatted store.
1. The .rc file handling now merges all known .rc files (.ncrc,.daprc, and .dodsrc) in that order and using those in HOME first, then in current directory. For duplicate entries, the later ones override the earlier ones. This change is to remove some of the conflicts inherent in the current .rc file load process. A set of test cases was also added.
1. Re-order tests in configure.ac and CMakeLists.txt so that if libcurl
is not found then the other options that depend upon it properly
are disabled.
1. I decided that xarray support should be enabled by default for pure
zarr. In order to allow disabling, I added a new mode flag "noxarray".
1. Certain test in nczarr_test depend on use of .dodsrc. In order for these
to work when testing in parallel, some inter-test dependencies needed to
be added.
1. Improved authorization testing to use changes in thredds.ucar.edu
2021-04-25 09:48:15 +08:00
|
|
|
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
ZTRACE(6,"map=%s key=%s count=%llu",map->url,key,count);
|
2021-01-29 11:11:01 +08:00
|
|
|
|
|
|
|
zip_error_init(&zerror);
|
|
|
|
|
|
|
|
/* Create directories */
|
|
|
|
if((stat = zzcreategroup(zzmap,key,SKIPLAST))) goto done;
|
|
|
|
|
|
|
|
switch(stat = zzlookupobj(zzmap,key,&zindex)) {
|
|
|
|
case NC_NOERR:
|
2021-07-18 06:55:30 +08:00
|
|
|
stat = NC_EOBJECT; //goto done; /* Zip files are write once */
|
2021-01-29 11:11:01 +08:00
|
|
|
zflags |= ZIP_FL_OVERWRITE;
|
|
|
|
break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case NC_ENOOBJECT: stat = NC_NOERR; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_EEMPTY: /* its a dir; fall thru */
|
|
|
|
default: goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
zflags |= ZIP_FL_ENC_UTF_8;
|
|
|
|
compression = ZIP_CM_STORE;
|
|
|
|
|
|
|
|
/* prepend the dataset to get truekey */
|
|
|
|
/* Note, assume key[0] == '/' */
|
|
|
|
if((stat = nczm_appendn(&truekey,2,zzmap->dataset,key)))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
if(count > 0) {
|
|
|
|
/* Apparently, the buffer to be written needs to be around at zip_close
|
|
|
|
so we need to make a local copy that will be freed by libzip after it is
|
|
|
|
no longer needed */
|
|
|
|
/* Duplicate the buffer */
|
|
|
|
if((localbuffer = malloc((size_t)count))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
memcpy(localbuffer,content,count);
|
|
|
|
}
|
|
|
|
if((zs = zip_source_buffer(zzmap->archive, localbuffer, (zip_uint64_t)count, 1)) == NULL)
|
|
|
|
{stat = zipmaperr(zzmap); goto done;}
|
|
|
|
|
|
|
|
if((zindex = zip_file_add(zzmap->archive, truekey, zs, zflags))<0)
|
|
|
|
{stat = zipmaperr(zzmap); goto done;}
|
|
|
|
|
|
|
|
zs = NULL; localbuffer = NULL;
|
|
|
|
|
|
|
|
if(zip_set_file_compression(zzmap->archive, zindex, compression, 0) < 0)
|
|
|
|
{stat = zipmaperr(zzmap); goto done;}
|
|
|
|
|
|
|
|
freesearchcache(zzmap->searchcache); zzmap->searchcache = NULL;
|
|
|
|
|
|
|
|
done:
|
|
|
|
if(zs) zip_source_free(zs);
|
|
|
|
nullfree(localbuffer);
|
|
|
|
zip_error_fini(&zerror);
|
|
|
|
nullfree(truekey);
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Return a list of full keys immediately under a specified prefix key.
|
|
|
|
In theory, the returned list should be sorted in lexical order,
|
|
|
|
but it possible that it is not.
|
|
|
|
Note that for zip, it is not possible to get just the keys of length n+1,
|
|
|
|
so, this search must get all keys and process them one by one.
|
|
|
|
@return NC_NOERR if success, even if no keys returned.
|
|
|
|
@return NC_EXXX return true error
|
|
|
|
*/
|
|
|
|
int
|
|
|
|
zipsearch(NCZMAP* map, const char* prefix0, NClist* matches)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
ZZMAP* zzmap = (ZZMAP*)map;
|
|
|
|
char* trueprefix = NULL;
|
|
|
|
size_t truelen;
|
|
|
|
zip_int64_t num_entries, i;
|
|
|
|
char** cache = NULL;
|
|
|
|
size_t prefixlen;
|
|
|
|
NClist* tmp = NULL;
|
|
|
|
|
|
|
|
ZTRACE(6,"map=%s prefix0=%s",map->url,prefix0);
|
|
|
|
|
|
|
|
/* prefix constraints:
|
|
|
|
1. prefix is "/"
|
|
|
|
2. or prefix has leading '/' and no trailing '/'
|
|
|
|
*/
|
|
|
|
|
|
|
|
/* Fix up the prefix; including adding the dataset name to the front */
|
|
|
|
if(prefix0 == NULL || strlen(prefix0)==0)
|
|
|
|
prefix0 = "/";
|
|
|
|
/* make sure that prefix0 has leading '/' */
|
|
|
|
if(prefix0[0] != '/')
|
|
|
|
{stat = NC_EINVAL; goto done;}
|
|
|
|
prefixlen = strlen(prefix0);
|
|
|
|
truelen = prefixlen+strlen(zzmap->dataset)+1; /* possible trailing '/'*/
|
|
|
|
if((trueprefix = (char*)malloc(truelen+1+1))==NULL) /* nul term */
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
/* Build the true prefix */
|
|
|
|
trueprefix[0] = '\0';
|
|
|
|
strlcat(trueprefix,zzmap->dataset,truelen+1);
|
|
|
|
strlcat(trueprefix,prefix0,truelen+1); /* recall prefix starts with '/' */
|
|
|
|
/* If the prefix did not end in '/', then add it */
|
|
|
|
if(prefixlen > 1 && prefix0[prefixlen-1] != '/')
|
|
|
|
strlcat(trueprefix,"/",truelen+1);
|
|
|
|
truelen = strlen(trueprefix);
|
|
|
|
|
|
|
|
/* Get number of entries */
|
|
|
|
num_entries = zip_get_num_entries(zzmap->archive, (zip_flags_t)0);
|
|
|
|
#ifdef CACHESEARCH
|
|
|
|
if(num_entries > 0 && zzmap->searchcache == NULL) {
|
|
|
|
/* Release the current cache */
|
|
|
|
freesearchcache(zzmap->searchcache);
|
|
|
|
zzmap->searchcache = NULL;
|
|
|
|
/* Re-build the searchcache */
|
|
|
|
if((cache = calloc(sizeof(char*),num_entries+1))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
for(i=0;i < num_entries; i++) {
|
|
|
|
const char *name = NULL;
|
|
|
|
/* get ith entry */
|
|
|
|
name = zip_get_name(zzmap->archive, i, (zip_flags_t)0);
|
|
|
|
/* Add to cache */
|
|
|
|
if((cache[i] = strdup(name))==NULL)
|
|
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
}
|
|
|
|
cache[num_entries] = NULL;
|
|
|
|
zzmap->searchcache = cache; cache = NULL;
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#ifdef CACHESEARCH
|
|
|
|
if(zzmap->searchcache != NULL)
|
|
|
|
#endif
|
|
|
|
{
|
|
|
|
const char *key = NULL;
|
|
|
|
size_t keylen = 0;
|
|
|
|
char* match = NULL;
|
|
|
|
const char* p;
|
|
|
|
|
|
|
|
tmp = nclistnew();
|
|
|
|
/* Walk cache looking for names with prefix plus exactly one other segment */
|
|
|
|
for(i=0;i < num_entries; i++) {
|
|
|
|
/* get ith entry */
|
|
|
|
#ifdef CACHESEARCH
|
|
|
|
key = zzmap->searchcache[i];
|
|
|
|
#else
|
|
|
|
key = zip_get_name(zzmap->archive, i, (zip_flags_t)0);
|
|
|
|
#endif
|
|
|
|
keylen = strlen(key);
|
|
|
|
/* Does this name begin with trueprefix? */
|
|
|
|
if(keylen > 0
|
|
|
|
&& (keylen <= truelen || strncmp(key,trueprefix,truelen) != 0))
|
|
|
|
continue; /* no match */
|
|
|
|
/* skip trueprefix and extract first segment */
|
|
|
|
p = (key+truelen);
|
|
|
|
if(*p == '\0') continue; /* key is all there is, so ignore it */
|
|
|
|
/* get seg 1 */
|
|
|
|
if((nczm_segment1(p,&match))) goto done;
|
|
|
|
nclistpush(tmp,match); match = NULL;
|
|
|
|
}
|
|
|
|
/* Now remove duplicates */
|
|
|
|
for(i=0;i<nclistlength(tmp);i++) {
|
|
|
|
int j;
|
|
|
|
int duplicate = 0;
|
|
|
|
const char* is = nclistget(tmp,i);
|
|
|
|
for(j=0;j<nclistlength(matches);j++) {
|
|
|
|
const char* js = nclistget(matches,j);
|
|
|
|
if(strcmp(js,is)==0) {duplicate = 1; break;} /* duplicate */
|
|
|
|
}
|
|
|
|
if(!duplicate)
|
|
|
|
nclistpush(matches,strdup(is));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
nclistfreeall(tmp);
|
|
|
|
if(cache != NULL) freesearchcache(cache);
|
|
|
|
nullfree(trueprefix);
|
|
|
|
return ZUNTRACEX(stat,"|matches|=%d",(int)nclistlength(matches));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/* Utilities */
|
|
|
|
|
|
|
|
/* Guarantee existence of a group */
|
|
|
|
static int
|
|
|
|
zzcreategroup(ZZMAP* zzmap, const char* key, int nskip)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
int i, len;
|
|
|
|
char* fullpath = NULL;
|
|
|
|
NCbytes* path = ncbytesnew();
|
|
|
|
NClist* segments = nclistnew();
|
|
|
|
ZINDEX zindex;
|
|
|
|
zip_flags_t zipflags = ZIP_FL_ENC_UTF_8;
|
|
|
|
|
|
|
|
ZTRACE(7,"map=%s key=%s nskip=%d",zzmap->map.url,key,nskip);
|
|
|
|
if((stat=nczm_split(key,segments)))
|
|
|
|
goto done;
|
|
|
|
len = nclistlength(segments);
|
|
|
|
len -= nskip; /* leave off last nskip segments */
|
|
|
|
/* Start with the dataset */
|
|
|
|
ncbytescat(path,zzmap->dataset);
|
|
|
|
for(i=0;i<len;i++) {
|
|
|
|
const char* seg = nclistget(segments,i);
|
|
|
|
ncbytescat(path,"/");
|
|
|
|
ncbytescat(path,seg);
|
|
|
|
/* open and/or create the directory */
|
|
|
|
if((zindex = zip_dir_add(zzmap->archive, ncbytescontents(path), zipflags))<0) {
|
|
|
|
switch(stat = zipmaperr(zzmap)) {
|
2021-07-18 06:55:30 +08:00
|
|
|
case NC_EOBJECT: stat = NC_NOERR; break; /* ok */
|
2021-01-29 11:11:01 +08:00
|
|
|
default:
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
done:
|
|
|
|
nullfree(fullpath);
|
|
|
|
ncbytesfree(path);
|
|
|
|
nclistfreeall(segments);
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Lookup a key
|
|
|
|
@return NC_NOERR if found and is a content-bearing object
|
2021-07-18 06:55:30 +08:00
|
|
|
@return NC_ENOOBJECT if not found
|
2021-01-29 11:11:01 +08:00
|
|
|
@return NC_EEMPTY if a dir
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
zzlookupobj(ZZMAP* zzmap, const char* key, ZINDEX* zindex)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
char* zipfile = NULL;
|
|
|
|
char* zipdir = NULL;
|
|
|
|
|
|
|
|
ZTRACE(7,"map=%s key=%s",zzmap->map.url,key);
|
|
|
|
|
|
|
|
if(key == NULL)
|
|
|
|
{stat = NC_EINVAL; goto done;}
|
|
|
|
|
|
|
|
/* Note, assume key[0] == '/' */
|
|
|
|
if((stat = nczm_appendn(&zipfile,2,zzmap->dataset,key)))
|
|
|
|
goto done;
|
|
|
|
|
|
|
|
/* See if exists as a file */
|
|
|
|
if((*zindex = zip_name_locate(zzmap->archive, zipfile, 0))<0) {
|
|
|
|
/* do a second check to see if zippath as a dir */
|
|
|
|
if((stat = nczm_appendn(&zipdir,2,zipfile,"/")))
|
|
|
|
goto done;
|
|
|
|
if((*zindex = zip_name_locate(zzmap->archive, zipdir, 0))<0)
|
|
|
|
{stat = zipmaperr(zzmap); goto done;}
|
|
|
|
stat = NC_EEMPTY; /* signal a directory */
|
|
|
|
}
|
|
|
|
|
|
|
|
done:
|
|
|
|
nullfree(zipfile);
|
|
|
|
nullfree(zipdir);
|
|
|
|
return ZUNTRACE(stat);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get length given the index */
|
|
|
|
static int
|
|
|
|
zzlen(ZZMAP* zzmap, ZINDEX zindex, size64_t* lenp)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
size64_t len = 0;
|
|
|
|
zip_stat_t statbuf;
|
|
|
|
zip_flags_t zipflags = 0;
|
|
|
|
|
|
|
|
assert(zindex >= 0);
|
|
|
|
|
|
|
|
ZTRACE(6,"zzmap=%s index=%llu",zzmap,zindex);
|
|
|
|
|
|
|
|
zip_stat_init(&statbuf);
|
|
|
|
if(zip_stat_index(zzmap->archive,zindex,zipflags,&statbuf) < 0)
|
|
|
|
{stat = (zipmaperr(zzmap)); goto done;}
|
|
|
|
assert(statbuf.valid & ZIP_STAT_SIZE);
|
|
|
|
len = statbuf.size; /* Always return uncompressed size */
|
|
|
|
|
|
|
|
if(lenp) *lenp = len;
|
|
|
|
|
|
|
|
done:
|
|
|
|
return ZUNTRACEX(stat,"len=%llu",(lenp?*lenp:777777777777));
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
freesearchcache(char** cache)
|
|
|
|
{
|
|
|
|
char** p;
|
|
|
|
if(cache == NULL) return;
|
|
|
|
for(p=cache;*p;p++) {
|
|
|
|
free(*p);
|
|
|
|
}
|
|
|
|
free(cache);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**************************************************/
|
|
|
|
/* External API objects */
|
|
|
|
|
|
|
|
NCZMAP_DS_API zmap_zip = {
|
|
|
|
NCZM_ZIP_V1,
|
|
|
|
ZIP_PROPERTIES,
|
|
|
|
zipcreate,
|
|
|
|
zipopen,
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
ziptruncate,
|
2021-01-29 11:11:01 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
static NCZMAP_API zapi = {
|
|
|
|
NCZM_ZIP_V1,
|
|
|
|
zipclose,
|
|
|
|
zipexists,
|
|
|
|
ziplen,
|
|
|
|
zipread,
|
|
|
|
zipwrite,
|
|
|
|
zipsearch,
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
zipmaperr(ZZMAP* zzmap)
|
|
|
|
{
|
|
|
|
zip_error_t* zerr = (zip_error_t*)zip_get_error(zzmap->archive);
|
|
|
|
return ziperr(zerr);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ziperr(zip_error_t* zerror)
|
|
|
|
{
|
|
|
|
int zerrno = zip_error_code_zip(zerror);
|
|
|
|
return ziperrno(zerrno);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
ziperrno(int zerror)
|
|
|
|
{
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
switch (zerror) {
|
|
|
|
case ZIP_ER_OK: stat = NC_NOERR; break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case ZIP_ER_EXISTS: stat = NC_EOBJECT; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case ZIP_ER_MEMORY: stat = NC_ENOMEM; break;
|
|
|
|
case ZIP_ER_SEEK:
|
|
|
|
case ZIP_ER_READ:
|
|
|
|
case ZIP_ER_WRITE:
|
|
|
|
case ZIP_ER_TMPOPEN:
|
|
|
|
case ZIP_ER_CRC: stat = NC_EIO; break;
|
|
|
|
case ZIP_ER_ZIPCLOSED: stat = NC_EBADID; break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case ZIP_ER_NOENT: stat = NC_ENOOBJECT; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case ZIP_ER_OPEN: stat = NC_EACCESS; break;
|
|
|
|
case ZIP_ER_INVAL: stat = NC_EINVAL; break;
|
|
|
|
case ZIP_ER_INTERNAL: stat = NC_EINTERNAL; break;
|
|
|
|
case ZIP_ER_REMOVE: stat = NC_ECANTREMOVE; break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case ZIP_ER_DELETED: stat = NC_ENOOBJECT; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case ZIP_ER_RDONLY: stat = NC_EPERM; break;
|
2021-07-18 06:55:30 +08:00
|
|
|
case ZIP_ER_CHANGED: stat = NC_EOBJECT; break;
|
2021-01-29 11:11:01 +08:00
|
|
|
default: stat = NC_ENCZARR; break;
|
|
|
|
}
|
|
|
|
return stat;
|
|
|
|
}
|