Add support for the XArray Zarr _ARRAY_DIMENSIONS attribute

The XArray implementation that uses Zarr for storage
provides a mechanism to simulate named dimensions.
It does this by adding a per-variable attribute called
_ARRAY_DIMENSIONS. This attribute contains a list of names
to be matched against the shape values of the variable.
In effect a named dimension is created with the name
_ARRAY_DIMENSIONS(i) and length shape(i) for all i
in range 0..rank(variable).
Both read and write support is provided.

This XArray support is only invoked if the mode value
of "xarray" is defined. So for example, as in this URL.
````
https://s3.us-west-1.amazonaws.com/bucket/dataset#mode=nczarr,xarray,s3
````
Note that the "xarray" mode flag also implies mode flag "zarr", so the above
is equivalent to this URL.
````
https://s3.us-west-1.amazonaws.com/bucket/dataset#mode=nczarr,zarr,xarray,s3
````

The primary change to implement this was to unify the handling
of dimension references in libnczarr/zsync.

A test for this and other pure-zarr features was added as
nczarr_test/run_purezarr.sh

Other changes:
* Make sure distcheck leaves no files around.
* Change the special attribute flag DIMSCALEFLAG to HIDDENATTRFLAG
  to support the xarray attribute.
* Annotate the zmap implementations with feature flags such as
  WRITEONCE (for zip files).
This commit is contained in:
Dennis Heimbigner 2021-02-24 13:46:11 -07:00
parent d136d46d22
commit 2afbdbd18f
30 changed files with 1404 additions and 1511 deletions

View File

@ -4,7 +4,7 @@
name: Run netCDF Tests
on: [pull_request]
on: [pull_request,push]
jobs:

View File

@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release
## 4.8.0 - TBD
* [Enhancement] Add support for reading/writing pure Zarr storage format that supports the XArray _ARRAY_DIMENSIONS attribute. See [Github #1946](https://github.com/Unidata/netcdf-c/pull/1946) for more information.
* [Bug Fixes] The nccopy program was treating -d0 as turning deflation on rather than interpreting it as "turn off deflation". See [Github #1944](https://github.com/Unidata/netcdf-c/pull/1944) for more information.
* [Enhancement] Add support for storing NCZarr data in zip files. See [Github #1942](https://github.com/Unidata/netcdf-c/pull/1942) for more information.
* [Bug Fixes] Make fillmismatch the default for DAP2 and DAP4; too many servers ignore this requirement.

View File

@ -97,9 +97,8 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT;
#define NC4_DATA_SET(nc,data) ((nc)->dispatchdata = (void *)(data))
/* Reserved attribute flags: must be powers of 2. */
/** Hidden dimscale-related, per-variable attributes; immutable and
* unreadable thru API. */
#define DIMSCALEFLAG 1
/** Hidden attributes; immutable and unreadable thru API. */
#define HIDDENATTRFLAG 1
/** Readonly global attributes; readable, but immutable thru the
* API. */
@ -461,5 +460,6 @@ extern const NC_reservedatt* NC_findreserved(const char* name);
#define NC_ATT_FORMAT "_Format"
#define NC_ATT_DIMID_NAME "_Netcdf4Dimid"
#define NC_ATT_NC3_STRICT_NAME "_nc3_strict"
#define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS"
#endif /* _NC4INTERNAL_ */

View File

@ -122,6 +122,7 @@ static const struct MACRODEF {
{"dap4","mode","dap4"},
{"s3","mode","nczarr,s3"},
{"bytes","mode","bytes"},
{"xarray","mode","nczarr,zarr,xarray"},
{NULL,NULL,NULL}
};

View File

@ -459,8 +459,8 @@ nc4_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
if (nc->ext_ncid == ncid && varid == NC_GLOBAL && grp->parent == NULL
&& (ra->flags & READONLYFLAG))
return NC_ENAMEINUSE;
/* case 2: grp=NA, varid!=NC_GLOBAL, flags & DIMSCALEFLAG */
if (varid != NC_GLOBAL && (ra->flags & DIMSCALEFLAG))
/* case 2: grp=NA, varid!=NC_GLOBAL, flags & HIDDENATTRFLAG */
if (varid != NC_GLOBAL && (ra->flags & HIDDENATTRFLAG))
return NC_ENAMEINUSE;
}

View File

@ -52,7 +52,7 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr
zinfo->created = 1;
zinfo->common.file = file;
zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG);
if((zinfo->controls=NCZ_clonestringvec(0,controls)) == NULL)
if((zinfo->envv_controls=NCZ_clonestringvec(0,controls)) == NULL)
{stat = NC_ENOMEM; goto done;}
/* fill in some of the zinfo and zroot fields */
@ -73,7 +73,7 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr
}
/* initialize map handle*/
if((stat = nczmap_create(zinfo->features.mapimpl,nc->path,nc->mode,zinfo->features.flags,NULL,&zinfo->map)))
if((stat = nczmap_create(zinfo->controls.mapimpl,nc->path,nc->mode,zinfo->controls.flags,NULL,&zinfo->map)))
goto done;
done:
@ -122,7 +122,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
zinfo->created = 0;
zinfo->common.file = file;
zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG);
if((zinfo->controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/
if((zinfo->envv_controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/
{stat = NC_ENOMEM; goto done;}
/* Add struct to hold NCZ-specific group info. */
@ -134,10 +134,10 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
if((stat = applycontrols(zinfo))) goto done;
/* initialize map handle*/
if((stat = nczmap_open(zinfo->features.mapimpl,nc->path,mode,zinfo->features.flags,NULL,&zinfo->map)))
if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map)))
goto done;
if(!(zinfo->features.flags & FLAG_PUREZARR)
if(!(zinfo->controls.flags & FLAG_PUREZARR)
&& (stat = NCZ_downloadjson(zinfo->map, NCZMETAROOT, &json)) == NC_NOERR) {
/* Extract the information from it */
for(i=0;i<nclistlength(json->contents);i+=2) {
@ -153,7 +153,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
&zinfo->zarr.nczarr_version.release);
}
}
} else { /* zinfo->features.purezarr || no object */
} else { /* zinfo->controls.purezarr || no object */
zinfo->zarr.zarr_version = ZARRVERSION;
sscanf(NCZARRVERSION,"%lu.%lu.%lu",
&zinfo->zarr.nczarr_version.major,
@ -335,9 +335,9 @@ ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjs
if((stat=NCZ_uploadjson(map,tkey,jattrs)))
goto done;
if(!(zinfo->features.flags & FLAG_PUREZARR)) {
if(!(zinfo->controls.flags & FLAG_PUREZARR)) {
/* Construct the path to the .nczattr object */
if((stat = nczm_concat(fullpath,NCZATTR,&tkey)))
if((stat = nczm_concat(fullpath,NCZATTRS,&tkey)))
goto done;
/* Upload the .nczattr object */
if((stat=NCZ_uploadjson(map,tkey,jtypes)))
@ -356,10 +356,10 @@ done:
}
static const char*
controllookup(const char** controls, const char* key)
controllookup(const char** envv_controls, const char* key)
{
const char** p;
for(p=controls;*p;p++) {
for(p=envv_controls;*p;p+=2) {
if(strcasecmp(key,*p)==0) {
return p[1];
}
@ -375,26 +375,27 @@ applycontrols(NCZ_FILE_INFO_T* zinfo)
const char* value = NULL;
NClist* modelist = nclistnew();
if((value = controllookup((const char**)zinfo->controls,"mode")) != NULL) {
if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) {
if((stat = NCZ_comma_parse(value,modelist))) goto done;
}
/* Process the modelist first */
zinfo->features.mapimpl = NCZM_DEFAULT;
zinfo->controls.mapimpl = NCZM_DEFAULT;
for(i=0;i<nclistlength(modelist);i++) {
const char* p = nclistget(modelist,i);
if(strcasecmp(p,PUREZARR)==0) zinfo->features.flags |= FLAG_PUREZARR;
else if(strcasecmp(p,"zip")==0) zinfo->features.mapimpl = NCZM_ZIP;
else if(strcasecmp(p,"file")==0) zinfo->features.mapimpl = NCZM_FILE;
else if(strcasecmp(p,"s3")==0) zinfo->features.mapimpl = NCZM_S3;
if(strcasecmp(p,PUREZARRCONTROL)==0) zinfo->controls.flags |= FLAG_PUREZARR;
else if(strcasecmp(p,XARRAYCONTROL)==0) zinfo->controls.flags |= (FLAG_XARRAYDIMS|FLAG_PUREZARR); /*xarray=>zarr*/
else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP;
else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE;
else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3;
}
/* Process other controls */
if((value = controllookup((const char**)zinfo->controls,"log")) != NULL) {
zinfo->features.flags |= FLAG_LOGGING;
if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) {
zinfo->controls.flags |= FLAG_LOGGING;
ncsetlogging(1);
}
if((value = controllookup((const char**)zinfo->controls,"show")) != NULL) {
if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) {
if(strcasecmp(value,"fetch")==0)
zinfo->features.flags |= FLAG_SHOWFETCH;
zinfo->controls.flags |= FLAG_SHOWFETCH;
}
done:
nclistfreeall(modelist);

View File

@ -13,6 +13,7 @@
#undef ADEBUG
/**
* @internal Get the attribute list for either a varid or NC_GLOBAL
*
@ -443,8 +444,8 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
if (nc->ext_ncid == ncid && varid == NC_GLOBAL && grp->parent == NULL
&& (ra->flags & READONLYFLAG))
return NC_ENAMEINUSE;
/* case 2: grp=NA, varid!=NC_GLOBAL, flags & DIMSCALEFLAG */
if (varid != NC_GLOBAL && (ra->flags & DIMSCALEFLAG))
/* case 2: grp=NA, varid!=NC_GLOBAL, flags & HIDDENATTRFLAG */
if (varid != NC_GLOBAL && (ra->flags & HIDDENATTRFLAG))
return NC_ENAMEINUSE;
}

View File

@ -48,7 +48,7 @@ ncz_close_file(NC_FILE_INFO_T* file, int abort)
if((stat = nczmap_close(zinfo->map,(abort && zinfo->created)?1:0)))
goto done;
NCZ_freestringvec(0,zinfo->controls);
NCZ_freestringvec(0,zinfo->envv_controls);
NC_authfree(zinfo->auth);
nullfree(zinfo);
@ -171,6 +171,8 @@ zclose_vars(NC_GRP_INFO_T* grp)
/* Reclaim the type */
(void)zclose_type(var->type_info);
NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
}

View File

@ -241,7 +241,7 @@ NCZ_stringconvert1(nc_type srctype, char* src, char** strp)
nc_type dsttype = NC_NAT;
char s[1024];
assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype < NC_STRING);
assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype < NC_STRING);
/* Convert to a restricted set of values */
switch (srctype) {
case NC_BYTE: {

View File

@ -44,14 +44,22 @@
#define NCZMETAROOT "/.nczarr"
#define NCZGROUP ".nczgroup"
#define NCZVAR ".nczvar"
#define NCZATTR ".nczattr"
#define NCZARRAY ".nczarray"
#define NCZATTRS ".nczattrs"
/* Deprecated */
#define NCZVARDEP ".nczvar"
#define NCZATTRDEP ".nczattr"
#define ZGROUP ".zgroup"
#define ZATTRS ".zattrs"
#define ZARRAY ".zarray"
#define PUREZARR "zarr"
#define PUREZARRCONTROL "zarr"
#define XARRAYCONTROL "xarray"
/* Mnemonics */
#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */
/* Mnemonics */
#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */
@ -59,6 +67,7 @@
/**************************************************/
/* Forward */
struct NClist;
struct NCjson;
struct NCauth;
struct NCZMAP;
@ -87,14 +96,15 @@ typedef struct NCZ_FILE_INFO {
} zarr;
int created; /* 1=> created 0=>open */
int native_endianness; /* NC_ENDIAN_LITTLE | NC_ENDIAN_BIG */
char** controls; /* Envv format */
struct Features {
char** envv_controls; /* Envv format */
struct Controls {
size64_t flags;
# define FLAG_PUREZARR 1
# define FLAG_SHOWFETCH 2
# define FLAG_LOGGING 4
# define FLAG_PUREZARR 1
# define FLAG_SHOWFETCH 2
# define FLAG_LOGGING 4
# define FLAG_XARRAYDIMS 8
NCZM_IMPL mapimpl;
} features;
} controls;
} NCZ_FILE_INFO_T;
/* This is a struct to handle the dim metadata. */
@ -132,6 +142,7 @@ typedef struct NCZ_VAR_INFO {
int order; /* 1=>column major, 0=>row major (default); not currently enforced */
size_t scalar;
struct NCZChunkCache* cache;
struct NClist* xarray; /* names from _ARRAY_DIMENSIONS */
} NCZ_VAR_INFO_T;
/* Struct to hold ZARR-specific info for a field. */

View File

@ -23,16 +23,16 @@ extern NCZMAP_DS_API zmap_s3sdk;
/**************************************************/
NCZM_PROPERTIES
nczmap_properties(NCZM_IMPL impl)
NCZM_FEATURES
nczmap_features(NCZM_IMPL impl)
{
switch (impl) {
case NCZM_FILE: return zmap_file.properties;
case NCZM_FILE: return zmap_file.features;
#ifdef ENABLE_NCZARR_ZIP
case NCZM_ZIP: return zmap_zip.properties;
case NCZM_ZIP: return zmap_zip.features;
#endif
#ifdef ENABLE_S3_SDK
case NCZM_S3: return zmap_s3sdk.properties;
case NCZM_S3: return zmap_s3sdk.features;
#endif
default: break;
}
@ -436,6 +436,28 @@ done:
return THROW(ret);
}
/*
Extract the last segment from path.
*/
int
nczm_lastsegment(const char* path, char** lastp)
{
int ret = NC_NOERR;
const char* last = NULL;
if(path == NULL)
{if(lastp) *lastp = NULL; goto done;}
last = strrchr(path,'/');
if(last == NULL) last = path; else last++;
if(lastp) *lastp = strdup(last);
done:
return THROW(ret);
}
/*
Extract the basename from a path.
Basename is last segment minus one extension.
@ -444,30 +466,27 @@ Basename is last segment minus one extension.
int
nczm_basename(const char* path, char** basep)
{
int ret = NC_NOERR;
int stat = NC_NOERR;
char* base = NULL;
char* last = NULL;
const char* p = NULL;
const char* q = NULL;
ptrdiff_t delta;
if(path == NULL)
{base = NULL; goto done;}
if((stat=nczm_lastsegment(path,&last))) goto done;
p = strrchr(path,'/');
if(p == NULL) p = path; else p++;
q = strrchr(p,'.');
if(q == NULL) q = p + strlen(p);
delta = (q-p);
if(last == NULL) goto done;
p = strrchr(last,'.');
if(p == NULL) p = last+strlen(last);
delta = (p - last);
if((base = (char*)malloc(delta+1))==NULL)
{ret = NC_ENOMEM; goto done;}
memcpy(base,p,delta);
{stat = NC_ENOMEM; goto done;}
memcpy(base,last,delta);
base[delta] = '\0';
if(basep) {*basep = base; base = NULL;}
done:
nullfree(last);
nullfree(base);
return THROW(ret);
return THROW(stat);
}
/* bubble sort a list of strings */

View File

@ -158,7 +158,7 @@ NCZM_S3=3, /* Amazon S3 implementation */
#define NCZM_DEFAULT NCZM_ZIP
/* Define the per-implementation limitations flags */
typedef size64_t NCZM_PROPERTIES;
typedef size64_t NCZM_FEATURES;
/* powers of 2 */
#define NCZM_UNIMPLEMENTED 1 /* Unknown/ unimplemented */
#define NCZM_WRITEONCE 2 /* Objects can only be written once */
@ -218,7 +218,7 @@ struct NCZMAP_API {
/* Define the Dataset level API */
typedef struct NCZMAP_DS_API {
int version;
NCZM_PROPERTIES properties;
NCZM_FEATURES features;
int (*create)(const char *path, int mode, size64_t constraints, void* parameters, NCZMAP** mapp);
int (*open)(const char *path, int mode, size64_t constraints, void* parameters, NCZMAP** mapp);
} NCZMAP_DS_API;
@ -234,7 +234,7 @@ Get limitations of a particular implementation.
@return NC_NOERR if the operation succeeded
@return NC_EXXX if the operation failed for one of several possible reasons
*/
EXTERNL NCZM_PROPERTIES nczmap_properties(NCZM_IMPL);
EXTERNL NCZM_FEATURES nczmap_features(NCZM_IMPL);
/* Object API Wrappers; note that there are no group operations
because group keys do not map to directories.
@ -350,6 +350,7 @@ EXTERNL int nczm_localize(const char* path, char** newpathp, int local);
EXTERNL int nczm_canonicalpath(const char* path, char** cpathp);
EXTERNL int nczm_basename(const char* path, char** basep);
EXTERNL int nczm_segment1(const char* path, char** seg1p);
EXTERNL int nczm_lastsegment(const char* path, char** lastp);
/* bubble sorts (note arguments) */
EXTERNL void nczm_sortlist(struct NClist* l);

View File

@ -25,12 +25,13 @@ static int define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subg
static int searchvars(NCZ_FILE_INFO_T*, NC_GRP_INFO_T*, NClist*);
static int searchsubgrps(NCZ_FILE_INFO_T*, NC_GRP_INFO_T*, NClist*);
static int locategroup(NC_FILE_INFO_T* file, size_t nsegs, NClist* segments, NC_GRP_INFO_T** grpp);
static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimrefs, NC_DIM_INFO_T** dims);
static int simulatedimrefs(NC_FILE_INFO_T* file, int rank, size64_t* shapes, NC_DIM_INFO_T** dims);
static int createdim(NC_FILE_INFO_T* file, const char* name, size64_t dimlen, NC_DIM_INFO_T** dimp);
static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create);
static int decodeints(NCjson* jshape, size64_t* shapes);
static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap);
static int inferattrtype(NCjson* values, nc_type* typeidp);
static int mininttype(unsigned long long u64);
static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);
/**************************************************/
/**************************************************/
@ -173,7 +174,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp)
goto done;
nullfree(key); key = NULL;
if(!(zinfo->features.flags & FLAG_PUREZARR)) {
if(!(zinfo->controls.flags & FLAG_PUREZARR)) {
/* Create the NCZGROUP json object */
if((stat = NCJnew(NCJ_DICT,&jgroup)))
goto done;
@ -192,7 +193,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp)
goto done;
nullfree(key); key = NULL;
}
/* Build NCZATTR object and the .zattrs object */
/* Build NCZATTRS object and the .zattrs object */
assert(grp->att);
if((stat = ncz_sync_atts(file,(NC_OBJ*)grp, grp->att)))
goto done;
@ -238,13 +239,14 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
char* fullpath = NULL;
char* key = NULL;
char* dimpath = NULL;
NClist* dimrefs = NULL;
NCjson* jvar = NULL;
NCjson* jncvar = NULL;
NCjson* jdimrefs = NULL;
NCjson* jtmp = NULL;
size64_t shape[NC_MAX_VAR_DIMS];
NCZ_VAR_INFO_T* zvar = var->format_var_info;
LOG((3, "%s: dims: %s", __func__, key));
zinfo = file->format_file_info;
@ -376,19 +378,28 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
nullfree(key); key = NULL;
/* Build the NCZVAR object */
/* Capture dimref names as FQNs */
if(var->ndims > 0) {
if((dimrefs = nclistnew())==NULL) {stat = NC_ENOMEM; goto done;}
for(i=0;i<var->ndims;i++) {
NC_DIM_INFO_T* dim = var->dim[i];
if((stat = NCZ_dimkey(dim,&dimpath))) goto done;
nclistpush(dimrefs,dimpath);
dimpath = NULL;
}
}
/* Build the NCZARRAY object */
{
/* Create the dimrefs json object */
if((stat = NCJnew(NCJ_ARRAY,&jdimrefs)))
goto done;
/* Walk the dimensions and capture the fullpath names */
for(i=0;i<var->ndims;i++) {
NC_DIM_INFO_T* dim = var->dim[i];
if((stat = NCZ_dimkey(dim,&dimpath))) goto done;
NCJaddstring(jdimrefs,NCJ_STRING,dimpath);
nullfree(dimpath); dimpath = NULL;
for(i=0;i<nclistlength(dimrefs);i++) {
const char* dim = nclistget(dimrefs,i);
NCJaddstring(jdimrefs,NCJ_STRING,dim);
}
/* Create the NCZVAR json object */
/* Create the NCZARRAY json object */
if((stat = NCJnew(NCJ_DICT,&jncvar)))
goto done;
@ -411,9 +422,9 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
if((stat = NCJinsert(jncvar,"storage",jtmp))) goto done;
jtmp = NULL;
if(!(zinfo->features.flags & FLAG_PUREZARR)) {
/* Write out NCZVAR */
if((stat = nczm_concat(fullpath,NCZVAR,&key)))
if(!(zinfo->controls.flags & FLAG_PUREZARR)) {
/* Write out NCZARRAY */
if((stat = nczm_concat(fullpath,NCZARRAY,&key)))
goto done;
/* Write to map */
if((stat=NCZ_uploadjson(map,key,jncvar)))
@ -421,7 +432,8 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
nullfree(key); key = NULL;
}
}
/* Build .zattrs object including _nczattr object */
/* Build .zattrs object including .nczattrs object */
assert(var->att);
if((stat = ncz_sync_atts(file,(NC_OBJ*)var, var->att)))
goto done;
@ -433,6 +445,7 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var)
}
done:
nclistfreeall(dimrefs);
nullfree(fullpath);
nullfree(key);
nullfree(dimpath);
@ -540,46 +553,55 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist)
NCjson* jtype = NULL;
NCjson* jnczarr = NULL;
NCjson* jnczattr = NULL;
NCjson* jdimrefs = NULL;
NCZMAP* map = NULL;
char* fullpath = NULL;
char* key = NULL;
char* content = NULL;
char* dimpath = NULL;
NCjson* jname = NULL;
int isxarray = 0;
LOG((3, "%s", __func__));
zinfo = file->format_file_info;
map = zinfo->map;
if(ncindexsize(attlist) == 0)
if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1;
if(!isxarray && ncindexsize(attlist) == 0)
goto done; /* do nothing */
/* Create the jncattr object */
if((stat = NCJnew(NCJ_DICT,&jnczattr)))
goto done;
/* Create the jncattr.types object */
if((stat = NCJnew(NCJ_DICT,&jtypes)))
goto done;
/* Walk all the attributes and collect the types by attribute name */
for(i=0;i<ncindexsize(attlist);i++) {
NC_ATT_INFO_T* a = (NC_ATT_INFO_T*)ncindexith(attlist,i);
char tname[1024];
if(a->nc_typeid >= NC_STRING)
{stat = THROW(NC_ENCZARR); goto done;}
snprintf(tname,sizeof(tname),"%u",a->nc_typeid);
if((stat = NCJnewstring(NCJ_STRING,tname,&jtype)))
if(ncindexsize(attlist) > 0) {
/* Create the jncattr.types object */
if((stat = NCJnew(NCJ_DICT,&jtypes)))
goto done;
if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) /* add {name: type} */
/* Walk all the attributes and collect the types by attribute name */
for(i=0;i<ncindexsize(attlist);i++) {
NC_ATT_INFO_T* a = (NC_ATT_INFO_T*)ncindexith(attlist,i);
char tname[1024];
const NC_reservedatt* ra = NC_findreserved(a->hdr.name);
/* If reserved and hidden, then ignore */
if(ra && (ra->flags & HIDDENATTRFLAG)) continue;
if(a->nc_typeid >= NC_STRING)
{stat = THROW(NC_ENCZARR); goto done;}
snprintf(tname,sizeof(tname),"%u",a->nc_typeid);
if((stat = NCJnewstring(NCJ_STRING,tname,&jtype)))
goto done;
if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) /* add {name: type} */
goto done;
jtype = NULL;
}
/* Insert into the NCZATTRS json */
if((stat = NCJinsert(jnczattr,"types",jtypes)))
goto done;
jtype = NULL;
jtypes = NULL;
}
/* Insert into the NCZATTR json */
if((stat = NCJinsert(jnczattr,"types",jtypes)))
goto done;
jtypes = NULL;
/* Construct container path */
if(container->sort == NCGRP)
stat = NCZ_grpkey((NC_GRP_INFO_T*)container,&fullpath);
@ -588,10 +610,10 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist)
if(stat)
goto done;
/* Upload the NCZATTR object */
if((stat = nczm_concat(fullpath,NCZATTR,&key)))
/* Upload the NCZATTRS object */
if((stat = nczm_concat(fullpath,NCZATTRS,&key)))
goto done;
if(!(zinfo->features.flags & FLAG_PUREZARR)) {
if(!(zinfo->controls.flags & FLAG_PUREZARR)) {
/* Write to map */
if((stat=NCZ_uploadjson(map,key,jnczattr)))
goto done;
@ -601,6 +623,25 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist)
/* Jsonize the attribute list */
if((stat = ncz_jsonize_atts(attlist,&jatts)))
goto done;
if(container->sort == NCVAR) {
if(isxarray) {
NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container;
/* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */
if((stat = NCJnew(NCJ_ARRAY,&jdimrefs)))
goto done;
/* Walk the dimensions and capture the names */
for(i=0;i<var->ndims;i++) {
NC_DIM_INFO_T* dim = var->dim[i];
char* dimname = strdup(dim->hdr.name);
if(dimname == NULL) {stat = NC_ENOMEM; goto done;}
NCJaddstring(jdimrefs,NCJ_STRING,dimname);
nullfree(dimname); dimname = NULL;
}
/* Add the _ARRAY_ATTRIBUTE */
if((stat = NCJinsert(jatts,NC_XARRAY_DIMS,jdimrefs))) goto done;
jdimrefs = NULL;
}
}
/* write .zattrs path */
if((stat = nczm_concat(fullpath,ZATTRS,&key)))
@ -619,6 +660,7 @@ done:
NCJreclaim(jtypes);
NCJreclaim(jtype);
NCJreclaim(jnczarr);
NCJreclaim(jdimrefs);
NCJreclaim(jnczattr);
NCJreclaim(jname);
return THROW(stat);
@ -715,31 +757,39 @@ load_jatts(NCZMAP* map, NC_OBJ* container, NCjson** jattrsp, NClist** atypesp)
nullfree(key); key = NULL;
if(jattrs != NULL) {
/* Construct the path to the NCZATTR object */
if((stat = nczm_concat(fullpath,NCZATTR,&key))) goto done;
/* Download the NCZATTR object: may not exist if pure zarr */
switch((stat=NCZ_downloadjson(map,key,&jncattr))) {
/* Construct the path to the NCZATTRS object */
if((stat = nczm_concat(fullpath,NCZATTRS,&key))) goto done;
/* Download the NCZATTRS object: may not exist if pure zarr or using deprecated name */
stat=NCZ_downloadjson(map,key,&jncattr);
if(stat == NC_EEMPTY) {
/* try deprecated name */
nullfree(key); key = NULL;
if((stat = nczm_concat(fullpath,NCZATTRDEP,&key))) goto done;
stat=NCZ_downloadjson(map,key,&jncattr);
}
nullfree(key); key = NULL;
switch (stat) {
case NC_NOERR: break;
case NC_EEMPTY: stat = NC_NOERR; jncattr = NULL; break;
default: goto done; /* failure */
}
nullfree(key); key = NULL;
if(jncattr != NULL) {
NCjson* jtypes = NULL;
/* jncattr attribute should be a dict */
if(jncattr->sort != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;}
/* Extract "types */
/* Extract "types; may not exist if only hidden attributes are defined */
if((stat = NCJdictget(jncattr,"types",&jtypes))) goto done;
if(jtypes->sort != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;}
/* Convert to an envv list */
for(i=0;i<nclistlength(jtypes->contents);i+=2) {
const NCjson* key = nclistget(jtypes->contents,i);
const NCjson* value = nclistget(jtypes->contents,i+1);
if(key->sort != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;}
if(value->sort != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;}
nclistpush(atypes,strdup(key->value));
nclistpush(atypes,strdup(value->value));
if(jtypes != NULL) {
if(jtypes->sort != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;}
/* Convert to an envv list */
for(i=0;i<nclistlength(jtypes->contents);i+=2) {
const NCjson* key = nclistget(jtypes->contents,i);
const NCjson* value = nclistget(jtypes->contents,i+1);
if(key->sort != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;}
if(value->sort != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;}
nclistpush(atypes,strdup(key->value));
nclistpush(atypes,strdup(value->value));
}
}
}
}
@ -994,7 +1044,7 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp)
NClist* dimdefs = nclistnew();
NClist* varnames = nclistnew();
NClist* subgrps = nclistnew();
int nodimrefs = 0;
int purezarr = 0;
LOG((3, "%s: dims: %s", __func__, key));
@ -1005,11 +1055,11 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp)
if((stat = NCZ_grpkey(grp,&fullpath)))
goto done;
if(zinfo->features.flags & FLAG_PUREZARR) {
if(zinfo->controls.flags & FLAG_PUREZARR) {
if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps)))
goto done;
nodimrefs = 1;
} else { /*!(zinfo->features.flags & FLAG_PUREZARR) */
purezarr = 1;
} else { /*!purezarr*/
/* build NCZGROUP path */
if((stat = nczm_concat(fullpath,NCZGROUP,&key)))
goto done;
@ -1023,19 +1073,19 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp)
case NC_EEMPTY: /* probably pure zarr, so does not exist, use search */
if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps)))
goto done;
nodimrefs = 1;
purezarr = 1;
break;
default: goto done;
}
nullfree(key); key = NULL;
}
if(!nodimrefs) {
if(!purezarr) {
/* Define dimensions */
if((stat = define_dims(file,grp,dimdefs))) goto done;
}
/* Define vars */
/* Define vars taking xarray into account */
if((stat = define_vars(file,grp,varnames))) goto done;
/* Define sub-groups */
@ -1098,7 +1148,7 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container)
if(jattrs != NULL) {
/* Iterate over the attributes to create the in-memory attributes */
/* Watch for reading _FillValue */
/* Watch for reading _FillValue and possibly _ARRAY_DIMENSIONS (xarray) */
for(i=0;i<nclistlength(jattrs->contents);i+=2) {
NCjson* key = nclistget(jattrs->contents,i);
NCjson* value = nclistget(jattrs->contents,i+1);
@ -1107,20 +1157,34 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container)
/* See if this is reserved attribute */
ra = NC_findreserved(key->value);
if(ra != NULL) {
/* case 1: grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */
if(container->sort == NCGRP
&& file->root_grp == (NC_GRP_INFO_T*)container) {
/* This is the real thing */
if(strcmp(key->value,NCPROPS)==0) {
/* Setup provenance */
if(value->sort != NCJ_STRING)
{stat = THROW(NC_ENCZARR); goto done;} /*malformed*/
if((stat = NCZ_read_provenance(file,key->value,value->value)))
goto done;
}
/* Otherwise ignore the reserved attribute */
continue;
/* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */
if(strcmp(key->value,NCPROPS)==0
&& container->sort == NCGRP
&& file->root_grp == (NC_GRP_INFO_T*)container) {
/* Setup provenance */
if(value->sort != NCJ_STRING)
{stat = THROW(NC_ENCZARR); goto done;} /*malformed*/
if((stat = NCZ_read_provenance(file,key->value,value->value)))
goto done;
}
/* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */
if(strcmp(key->value,NC_XARRAY_DIMS)==0
&& container->sort == NCVAR
&& (ra->flags & HIDDENATTRFLAG)) {
/* store for later */
NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)((NC_VAR_INFO_T*)container)->format_var_info;
int i;
assert(value->sort == NCJ_ARRAY);
if((zvar->xarray = nclistnew())==NULL)
{stat = NC_ENOMEM; goto done;}
for(i=0;i<nclistlength(value->contents);i++) {
const NCjson* k = nclistget(value->contents,i);
assert(k != NULL && k->sort == NCJ_STRING);
nclistpush(zvar->xarray,strdup(k->value));
}
}
/* else ignore */
continue;
}
/* Create the attribute */
/* Collect the attribute's type and value */
@ -1191,7 +1255,8 @@ done:
}
/**
* @internal Materialize vars into memory
* @internal Materialize vars into memory;
* Take xarray and purezarr into account.
*
* @param file Pointer to file info struct.
* @param grp Pointer to grp info struct.
@ -1214,13 +1279,19 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
NCjson* jncvar = NULL;
NCjson* jdimrefs = NULL;
NCjson* jvalue = NULL;
NClist* dimrefs = NULL;
int hasdimrefs;
int purezarr = 0;
int xarray = 0;
nc_type typeid;
size64_t* shapes = NULL;
int rank = 0;
NClist* dimnames = nclistnew();
zinfo = file->format_file_info;
map = zinfo->map;
if(zinfo->controls.flags & FLAG_PUREZARR) purezarr = 1;
if(zinfo->controls.flags & FLAG_XARRAYDIMS) {purezarr = 1; xarray = 1;}
/* Load each var in turn */
for(i = 0; i < nclistlength(varnames); i++) {
NC_VAR_INFO_T* var;
@ -1252,53 +1323,8 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
nullfree(key); key = NULL;
assert((jvar->sort == NCJ_DICT));
if(zinfo->features.flags & FLAG_PUREZARR) {
var->storage = NC_CHUNKED;
hasdimrefs = 0;
} else { /*zinfo->features.flags & FLAG_PUREZARR*/
/* Download the NCZVAR object */
if((stat = nczm_concat(varpath,NCZVAR,&key))) goto done;
if((stat=NCZ_readdict(map,key,&jncvar))) goto done;
nullfree(key); key = NULL;
assert((jncvar->sort == NCJ_DICT));
/* Extract storage flag */
if((stat = NCJdictget(jncvar,"storage",&jvalue)))
goto done;
if(jvalue != NULL) {
if(strcmp(jvalue->value,"chunked") == 0) {
var->storage = NC_CHUNKED;
} else if(strcmp(jvalue->value,"compact") == 0) {
var->storage = NC_COMPACT;
} else if(strcmp(jvalue->value,"scalar") == 0) {
var->storage = NC_CONTIGUOUS;
zvar->scalar = 1;
} else { /*storage = NC_CONTIGUOUS;*/
var->storage = NC_CONTIGUOUS;
}
}
/* Extract dimrefs list */
switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) {
case NC_NOERR: /* Extract the dimref names */
assert((jdimrefs->sort == NCJ_ARRAY));
hasdimrefs = 0; /* until we have one */
dimrefs = nclistnew();
for(j=0;j<nclistlength(jdimrefs->contents);j++) {
const NCjson* dimpath = nclistget(jdimrefs->contents,j);
assert(dimpath->sort == NCJ_STRING);
nclistpush(dimrefs,strdup(dimpath->value));
hasdimrefs = 1;
}
break;
case NC_EEMPTY: /* simulate it from the shape of the variable */
stat = NC_NOERR;
hasdimrefs = 0;
break;
default: goto done;
}
jdimrefs = NULL;
}
/* Extract the .zarray info from jvar */
/* Use jvar to set up the rest of the NC_VAR_INFO_T object */
/* Verify the format */
{
int version;
@ -1327,32 +1353,16 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
}
/* shape */
{
int rank;
if((stat = NCJdictget(jvar,"shape",&jvalue))) goto done;
if(jvalue->sort != NCJ_ARRAY) {stat = THROW(NC_ENCZARR); goto done;}
/* Verify the rank */
if(zvar->scalar)
rank = 0;
else
rank = nclistlength(jvalue->contents);
if(hasdimrefs) { /* verify rank consistency */
if(nclistlength(dimrefs) != rank)
{stat = THROW(NC_ENCZARR); goto done;}
}
if(zvar->scalar) rank = 0; else rank = nclistlength(jvalue->contents);
/* Set the rank of the variable */
if((stat = nc4_var_set_ndims(var, rank))) goto done;
if(hasdimrefs) {
if((stat = parsedimrefs(file, dimrefs, var->dim)))
goto done;
} else { /* simulate the dimrefs */
size64_t shapes[NC_MAX_VAR_DIMS];
if((stat = decodeints(jvalue, shapes))) goto done;
if((stat = simulatedimrefs(file, rank, shapes, var->dim)))
goto done;
}
/* fill in the dimids */
for(j=0;j<rank;j++)
var->dimids[j] = var->dim[j]->hdr.id;
/* extract the shapes */
if((shapes = (size64_t*)malloc(sizeof(size64_t)*rank)) == NULL)
{stat = THROW(NC_ENOMEM); goto done;}
if((stat = decodeints(jvalue, shapes))) goto done;
}
/* chunks */
{
@ -1373,8 +1383,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
/* validate the chunk sizes */
zvar->chunkproduct = 1;
for(j=0;j<rank;j++) {
NC_DIM_INFO_T* d = var->dim[j]; /* matching dim */
if(chunks[j] == 0 || chunks[j] > d->len)
if(chunks[j] == 0 || chunks[j] > shapes[j])
{stat = THROW(NC_ENCZARR); goto done;}
var->chunksizes[j] = (size_t)chunks[j];
zvar->chunkproduct *= chunks[j];
@ -1418,17 +1427,75 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
if((stat = NCJdictget(jvar,"filters",&jvalue))) goto done;
/* ignore */
}
if(!purezarr) {
/* Download the NCZARRAY object */
if((stat = nczm_concat(varpath,NCZARRAY,&key)))
goto done;
if((stat=NCZ_readdict(map,key,&jncvar))) {
nullfree(key); key = NULL;
if((stat = nczm_concat(varpath,NCZVARDEP,&key))) /* try deprecated name */
goto done;
if((stat=NCZ_readdict(map,key,&jncvar)))
goto done;
}
nullfree(key); key = NULL;
assert((jncvar->sort == NCJ_DICT));
/* Extract storage flag */
if((stat = NCJdictget(jncvar,"storage",&jvalue)))
goto done;
if(jvalue != NULL) {
if(strcmp(jvalue->value,"chunked") == 0) {
var->storage = NC_CHUNKED;
} else if(strcmp(jvalue->value,"compact") == 0) {
var->storage = NC_COMPACT;
} else if(strcmp(jvalue->value,"scalar") == 0) {
var->storage = NC_CONTIGUOUS;
zvar->scalar = 1;
} else { /*storage = NC_CONTIGUOUS;*/
var->storage = NC_CONTIGUOUS;
}
}
/* Extract dimnames list */
switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) {
case NC_NOERR: /* Extract the dimref names */
assert((jdimrefs->sort == NCJ_ARRAY));
assert(nclistlength(jdimrefs->contents) == rank);
for(j=0;j<rank;j++) {
const NCjson* dimpath = nclistget(jdimrefs->contents,j);
assert(dimpath->sort == NCJ_STRING);
nclistpush(dimnames,strdup(dimpath->value));
}
jdimrefs = NULL; /* avoid double free */
break;
case NC_EEMPTY: /* will simulate it from the shape of the variable */
stat = NC_NOERR;
break;
default: goto done;
}
jdimrefs = NULL;
}
if((stat = computedimrefs(file, var, purezarr, xarray, rank, dimnames, shapes, var->dim)))
goto done;
/* Extract the dimids */
for(j=0;j<rank;j++)
var->dimids[j] = var->dim[j]->hdr.id;
/* Clean up from last cycle */
nclistfreeall(dimrefs); dimrefs = NULL;
nclistfreeall(dimnames); dimnames = nclistnew();
nullfree(varpath); varpath = NULL;
nullfree(shapes); shapes = NULL;
NCJreclaim(jvar); jvar = NULL;
NCJreclaim(jncvar); jncvar = NULL;
}
done:
nullfree(shapes);
nullfree(varpath);
nullfree(key);
nclistfreeall(dimrefs);
nclistfreeall(dimnames);
NCJreclaim(jvar);
NCJreclaim(jncvar);
return THROW(stat);
@ -1651,7 +1718,7 @@ done:
return stat;
}
/* Convert a list of integer strings to integers */
/* Convert a list of integer strings to 64 bit integers */
static int
decodeints(NCjson* jshape, size64_t* shapes)
{
@ -1669,50 +1736,26 @@ done:
return THROW(stat);
}
/* This code is a subset of NCZ_def_dim */
static int
simulatedimrefs(NC_FILE_INFO_T* file, int rank, size64_t* shapes, NC_DIM_INFO_T** dims)
createdim(NC_FILE_INFO_T* file, const char* name, size64_t dimlen, NC_DIM_INFO_T** dimp)
{
int i, j, stat = NC_NOERR;
int stat = NC_NOERR;
NC_GRP_INFO_T* root = file->root_grp;
NC_DIM_INFO_T* thed = NULL;
int match = 0;
for(i=0;i<rank;i++) {
size64_t dimlen = shapes[i];
char shapename[NC_MAX_NAME];
match = 0;
/* See if there is a dimension named ".zdim<dimlen>", if not create */
snprintf(shapename,sizeof(shapename),".zdim%llu",dimlen);
for(j=0;j<ncindexsize(root->dim);j++) {
thed = (NC_DIM_INFO_T*)ncindexith(root->dim,j);
if(strcmp(thed->hdr.name,shapename)==0) {
if(dimlen != (size64_t)thed->len)
{stat = THROW(NC_ENCZARR); goto done;} /* we have a problem */
match = 1;
break;
}
}
if(!match) { /* create the dimension */
/* This code is a subset of NCZ_def_dim */
thed = (NC_DIM_INFO_T*)ncindexlookup(root->dim,shapename);
if(thed == NULL) { /*create the dim*/
if ((stat = nc4_dim_list_add(root, shapename, (size_t)dimlen, -1, &thed)))
goto done;
/* Create struct for NCZ-specific dim info. */
if (!(thed->format_dim_info = calloc(1, sizeof(NCZ_DIM_INFO_T))))
{stat = NC_ENOMEM; goto done;}
((NCZ_DIM_INFO_T*)thed->format_dim_info)->common.file = file;
}
assert(thed != NULL);
}
/* Save the id */
dims[i] = thed;
}
if((stat = nc4_dim_list_add(root, name, (size_t)dimlen, -1, &thed)))
goto done;
assert(thed != NULL);
/* Create struct for NCZ-specific dim info. */
if (!(thed->format_dim_info = calloc(1, sizeof(NCZ_DIM_INFO_T))))
{stat = NC_ENOMEM; goto done;}
((NCZ_DIM_INFO_T*)thed->format_dim_info)->common.file = file;
*dimp = thed; thed = NULL;
done:
return THROW(stat);
return stat;
}
/*
Given a list of segments, find corresponding group.
*/
@ -1746,19 +1789,19 @@ done:
}
static int
parsedimrefs(NC_FILE_INFO_T* file, NClist* dimrefs, NC_DIM_INFO_T** dims)
parsedimrefs(NC_FILE_INFO_T* file, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create)
{
int i, stat = NC_NOERR;
NClist* segments = NULL;
for(i=0;i<nclistlength(dimrefs);i++) {
for(i=0;i<nclistlength(dimnames);i++) {
NC_GRP_INFO_T* g = NULL;
NC_DIM_INFO_T* d = NULL;
int j;
const char* dimpath = nclistget(dimrefs,i);
const char* dimpath = nclistget(dimnames,i);
const char* dimname = NULL;
/* Locate the corresponding NC_DIM_INFO_T* object */
/* Clear the list */
nclistfreeall(segments);
segments = nclistnew();
if((stat = ncz_splitkey(dimpath,segments)))
@ -1768,13 +1811,21 @@ parsedimrefs(NC_FILE_INFO_T* file, NClist* dimrefs, NC_DIM_INFO_T** dims)
/* Lookup the dimension */
dimname = nclistget(segments,nclistlength(segments)-1);
d = NULL;
dims[i] = NULL;
for(j=0;j<ncindexsize(g->dim);j++) {
d = (NC_DIM_INFO_T*)ncindexith(g->dim,j);
if(strcmp(d->hdr.name,dimname)==0) {
dims[i] = d;/* match */
dims[i] = d;
break;
}
}
if(dims[i] == NULL && create) {
/* If not found and create then create it */
if((stat = createdim(file, dimname, shape[i], &dims[i])))
goto done;
}
assert(dims[i] != NULL);
assert(dims[i]->len == shape[i]);
}
done:
nclistfreeall(segments);
@ -1877,7 +1928,7 @@ ncz_create_superblock(NCZ_FILE_INFO_T* zinfo)
zinfo->zarr.nczarr_version.release);
if((stat = NCJaddstring(json,NCJ_STRING,ver))) goto done;
}
if(!(zinfo->features.flags & FLAG_PUREZARR)) {
if(!(zinfo->controls.flags & FLAG_PUREZARR)) {
/* Write back to map */
if((stat=NCZ_uploadjson(map,NCZMETAROOT,json)))
goto done;
@ -1886,3 +1937,54 @@ done:
NCJreclaim(json);
return ZUNTRACE(stat);
}
/* Compute the set of dim refs for this variable, taking purezarr and xarray into account */
static int
computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims)
{
int stat = NC_NOERR;
int i;
int createdims = 0; /* 1 => we need to create the dims in root if they do not already exist */
NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info;
NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)(var->format_var_info);
NCjson* jatts = NULL;
assert(zfile && zvar);
/* xarray => purezarr */
assert(!xarray || purezarr);
if(xarray) {/* Read in the attributes to get xarray dimdef attribute */
char zdimname[4096];
if(zvar->xarray == NULL) {
assert(nclistlength(dimnames) == 0);
if((stat = ncz_read_atts(file,(NC_OBJ*)var))) goto done;
}
assert(zvar->xarray != NULL);
/* convert xarray to the dimnames */
for(i=0;i<nclistlength(zvar->xarray);i++) {
snprintf(zdimname,sizeof(zdimname),"/%s",(const char*)nclistget(zvar->xarray,i));
nclistpush(dimnames,strdup(zdimname));
}
createdims = 1; /* may need to create them */
}
/* If pure zarr and we have no dimref names, then fake it */
if(purezarr && nclistlength(dimnames) == 0) {
createdims = 1;
for(i=0;i<ndims;i++) {
/* Compute the set of absolute paths to dimrefs */
char zdimname[4096];
snprintf(zdimname,sizeof(zdimname),"/.zdim_%llu",shapes[i]);
nclistpush(dimnames,strdup(zdimname));
}
}
/* Now, use dimnames to get the dims; create if necessary */
if((stat = parsedimrefs(file,dimnames,shapes,dims,createdims)))
goto done;
done:
NCJreclaim(jatts);
return THROW(stat);
}

View File

@ -32,23 +32,23 @@
* across all possible dispatchers
*/
#define NRESERVED 11 /*|NC_reservedatt|*/
/** @internal List of reserved attributes. This list must be in sorted
* order for binary search. */
static const NC_reservedatt NC_reserved[NRESERVED] = {
{NC_ATT_CLASS, READONLYFLAG|DIMSCALEFLAG}, /*CLASS*/
{NC_ATT_DIMENSION_LIST, READONLYFLAG|DIMSCALEFLAG}, /*DIMENSION_LIST*/
{NC_ATT_NAME, READONLYFLAG|DIMSCALEFLAG}, /*NAME*/
{NC_ATT_REFERENCE_LIST, READONLYFLAG|DIMSCALEFLAG}, /*REFERENCE_LIST*/
{NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/
{ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG}, /*_IsNetcdf4*/
{NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG},/*_NCProperties*/
{NC_ATT_COORDINATES, READONLYFLAG|DIMSCALEFLAG|MATERIALIZEDFLAG},/*_Netcdf4Coordinates*/
{NC_ATT_DIMID_NAME, READONLYFLAG|DIMSCALEFLAG|MATERIALIZEDFLAG},/*_Netcdf4Dimid*/
{SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG},/*_SuperblockVersion*/
{NC_ATT_NC3_STRICT_NAME, READONLYFLAG|MATERIALIZEDFLAG}, /*_nc3_strict*/
/** @internal List of reserved attributes.
WARNING: This list must be in sorted order for binary search. */
static const NC_reservedatt NC_reserved[] = {
{NC_ATT_CLASS, READONLYFLAG|HIDDENATTRFLAG}, /*CLASS*/
{NC_ATT_DIMENSION_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*DIMENSION_LIST*/
{NC_ATT_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*NAME*/
{NC_ATT_REFERENCE_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*REFERENCE_LIST*/
{NC_XARRAY_DIMS, READONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/
{NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/
{ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG}, /*_IsNetcdf4*/
{NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/
{NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Coordinates*/
{NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Dimid*/
{SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG}, /*_SuperblockVersion*/
{NC_ATT_NC3_STRICT_NAME, READONLYFLAG|MATERIALIZEDFLAG}, /*_nc3_strict*/
};
#define NRESERVED (sizeof(NC_reserved) / sizeof(NC_reservedatt)) /*|NC_reservedatt|*/
/* These hold the file caching settings for the library. */
size_t nc4_chunk_cache_size = CHUNK_CACHE_SIZE; /**< Default chunk cache size. */

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -1,9 +1,8 @@
/* A Bison parser, made by GNU Bison 3.5.1. */
/* A Bison parser, made by GNU Bison 3.0.4. */
/* Bison interface for Yacc-like parsers in C
Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2020 Free Software Foundation,
Inc.
Copyright (C) 1984, 1989-1990, 2000-2015 Free Software Foundation, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
@ -31,9 +30,6 @@
This special exception was added by the Free Software Foundation in
version 2.2 of Bison. */
/* Undocumented macros, especially those whose name start with YY_,
are private implementation details. Do not rely on them. */
#ifndef YY_NCG_NCGEN_TAB_H_INCLUDED
# define YY_NCG_NCGEN_TAB_H_INCLUDED
/* Debug traces. */
@ -107,9 +103,10 @@ extern int ncgdebug;
/* Value type. */
#if ! defined YYSTYPE && ! defined YYSTYPE_IS_DECLARED
union YYSTYPE
{
#line 151 "ncgen.y"
#line 151 "ncgen.y" /* yacc.c:1909 */
Symbol* sym;
unsigned long size; /* allow for zero size to indicate e.g. UNLIMITED*/
@ -118,9 +115,9 @@ int nctype; /* for tracking attribute list type*/
Datalist* datalist;
NCConstant* constant;
#line 122 "ncgen.tab.h"
#line 119 "ncgeny.h" /* yacc.c:1909 */
};
typedef union YYSTYPE YYSTYPE;
# define YYSTYPE_IS_TRIVIAL 1
# define YYSTYPE_IS_DECLARED 1

View File

@ -82,6 +82,7 @@ IF(ENABLE_TESTS)
add_sh_test(nczarr_test run_fillonlyz)
ENDIF()
add_sh_test(nczarr_test run_ncgen4)
add_sh_test(nczarr_test run_purezarr)
BUILD_BIN_TEST(tst_chunkcases ${TSTCOMMONSRC})
TARGET_INCLUDE_DIRECTORIES(tst_chunkcases PUBLIC ../libnczarr)

View File

@ -50,6 +50,7 @@ TESTS += run_fillonlyz.sh
endif
TESTS += run_ncgen4.sh
TESTS += run_purezarr.sh
check_PROGRAMS += tst_chunkcases
tst_chunkcases_SOURCES = tst_chunkcases.c ${tstcommonsrc}
@ -97,6 +98,7 @@ ncdumpchunks_SOURCES = ncdumpchunks.c
EXTRA_DIST = CMakeLists.txt \
run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ut_chunk.sh run_ncgen4.sh \
run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh run_s3_cleanup.sh \
run_purezarr.sh \
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
ref_ut_map_readmeta.txt ref_ut_map_readmeta2.txt ref_ut_map_search.txt \
ref_ut_mapapi_create.cdl ref_ut_mapapi_data.cdl ref_ut_mapapi_meta.cdl ref_ut_mapapi_search.txt \
@ -108,7 +110,8 @@ ref_whole.cdl ref_whole.txt \
ref_skip.cdl ref_skip.txt ref_skipw.cdl \
ref_rem.cdl ref_rem.dmp ref_ndims.cdl ref_ndims.dmp \
ref_misc1.cdl ref_misc1.dmp \
ref_avail1.cdl ref_avail1.dmp ref_avail1.txt
ref_avail1.cdl ref_avail1.dmp ref_avail1.txt \
ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl
CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc

View File

@ -0,0 +1,12 @@
netcdf tmp_purezarr {
dimensions:
.zdim_2 = 2 ;
.zdim_5 = 5 ;
variables:
int i(.zdim_2, .zdim_5) ;
data:
i =
_, _, _, _, _,
_, _, _, _, _ ;
}

View File

@ -0,0 +1,7 @@
netcdf ref_purezarr {
dimensions:
x = 2;
y = 5;
variables:
int i(x,y) ;
}

View File

@ -1,4 +1,4 @@
/meta2/.nczvar: |{
/meta2/.nczarray: |{
"foo": 42,
"bar": "apples",
"baz": [1, 2, 3, 4],

View File

@ -5,4 +5,4 @@
[4] /meta1
[5] /meta1/.zarray
[6] /meta2
[7] /meta2/.nczvar
[7] /meta2/.nczarray

View File

@ -4,7 +4,7 @@
"foo": 42,
"bar": "apples",
"baz": [1, 2, 3, 4]}|
[6] /meta2/.nczvar : (64) |{
[6] /meta2/.nczarray : (64) |{
"foo": 42,
"bar": "apples",
"baz": [1, 2, 3, 4],

View File

@ -3,7 +3,7 @@
"foo": 42,
"bar": "apples",
"baz": [1, 2, 3, 4]}|
[4] /meta2/.nczvar : (64) |{
[4] /meta2/.nczarray : (64) |{
"foo": 42,
"bar": "apples",
"baz": [1, 2, 3, 4],

View File

@ -0,0 +1,12 @@
netcdf tmp_xarray {
dimensions:
x = 2 ;
y = 5 ;
variables:
int i(x, y) ;
data:
i =
_, _, _, _, _,
_, _, _, _, _ ;
}

37
nczarr_test/run_purezarr.sh Executable file
View File

@ -0,0 +1,37 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
. "$srcdir/test_nczarr.sh"
# This shell script tests support for:
# 1. pure zarr read/write
# 2. xarray read/write
set -e
testcase() {
zext=$1
echo "*** Test: pure zarr write; format=$zext"
fileargs tmp_purezarr "zarr&mode=$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
${NCDUMP} $fileurl > tmp_purezarr_${zext}.cdl
diff -b ${srcdir}/ref_purezarr.cdl tmp_purezarr_${zext}.cdl
echo "*** Test: xarray zarr write; format=$zext"
fileargs tmp_xarray "xarray&mode=$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
${NCDUMP} $fileurl > tmp_xarray_${zext}.cdl
diff -b ${srcdir}/ref_xarray.cdl tmp_xarray_${zext}.cdl
}
testcase file
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi
if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi
exit 0

View File

@ -75,10 +75,12 @@ mapstillexists() {
fileargs() {
f="$1"
frag="$2"
if test "x$frag" = x ; then frag="mode=nczarr,$zext" ; fi
case "$zext" in
s3)
S3PATH="${NCZARR_S3_TEST_URL}/netcdf-c"
fileurl="${S3PATH}/${f}#mode=nczarr,$zext"
fileurl="${S3PATH}/${f}#${frag}"
file=$fileurl
S3HOST=`${execdir}/zs3parse -h $S3PATH`
S3BUCKET=`${execdir}/zs3parse -b $S3PATH`
@ -86,7 +88,7 @@ fileargs() {
;;
*)
file="${f}.$zext"
fileurl="file://${f}.$zext#mode=nczarr,$zext"
fileurl="file://${f}.$zext#${frag}"
;;
esac
}

View File

@ -140,7 +140,7 @@ writemeta2(void)
if((stat = nczmap_open(impl,url,NC_WRITE,0,NULL,&map)))
goto done;
if((stat=nczm_concat(META2,NCZVAR,&path)))
if((stat=nczm_concat(META2,NCZARRAY,&path)))
goto done;
if((stat = nczmap_write(map, path, 0, strlen(metadata2), metadata2)))
goto done;
@ -210,7 +210,7 @@ readmeta2(void)
if((stat = nczmap_open(impl,url,0,0,NULL,&map)))
goto done;
if((stat = readkey(map,META2,NCZVAR)))
if((stat = readkey(map,META2,NCZARRAY)))
goto done;
done:
@ -228,7 +228,7 @@ writedata(void)
int i;
size64_t totallen;
char* data1p = (char*)&data1[0]; /* byte level version of data1 */
NCZM_PROPERTIES props;
NCZM_FEATURES features;
/* Create the data */
for(i=0;i<DATA1LEN;i++) data1[i] = i;
@ -241,8 +241,8 @@ writedata(void)
if((stat=nczm_concat(DATA1,"0",&path)))
goto done;
props = nczmap_properties(impl);
if((NCZM_ZEROSTART & props) || (NCZM_WRITEONCE & props)) {
features = nczmap_features(impl);
if((NCZM_ZEROSTART & features) || (NCZM_WRITEONCE & features)) {
if((stat = nczmap_write(map, path, 0, totallen, data1p)))
goto done;
} else {

View File

@ -277,7 +277,7 @@ simpledata(void)
int i;
size64_t totallen, size;
char* data1p = (char*)&data1[0]; /* byte level version of data1 */
NCZM_PROPERTIES props;
NCZM_FEATURES features;
title(__func__);
@ -291,8 +291,8 @@ simpledata(void)
truekey = makekey(DATA1);
props = nczmap_properties(impl);
if((NCZM_ZEROSTART & props) || (NCZM_WRITEONCE & props)) {
features = nczmap_features(impl);
if((NCZM_ZEROSTART & features) || (NCZM_WRITEONCE & features)) {
if((stat = nczmap_write(map, truekey, 0, totallen, data1p)))
goto done;
} else {