mirror of
https://github.com/Unidata/netcdf-c.git
synced 2024-11-27 07:30:33 +08:00
d953899559
re: https://github.com/zarr-developers/zarr-specs/issues/41 After discussions with the Zarr community, it was decided to convert to a new representation of the NCZarr meta-data extensions: version 2. These extensions store information necessary to mapping the Zarr data model to the netcdf-4 data model. The basic change is to remove the NCZarr specific objects: .nczarr, .nczgroup, .nczarray, and .nczattr. The contents of these objects is moved into the corresponding existing Zarr objects as special keys. The mapping is as follows: * ''.nczarr'' => ''/.zgroup/_NCZARR_SUPERBLOCK_'' * ''.nczgroup => ''.zgroup/_NCZARR_GROUP_'' * ''.nczarray => ''.zarray/_NCZARR_ARRAY_'' * ''.nczattr => ''.zattr/_NCZARR_ATTR_'' Backward compatibility is maintained by looking for the object ''/.nczarr'' and if found, then assuming that the dataset is in the older version 1 format. This compatibility only supports reading of such version 1 datasets. Documentation and test cases are also added. Misc. Other Changes: 1. The json parsing code was added to the general library instead of nczarr only (ncjson.c, ncjson.h). 2. Improved support for different platform paths by allowing conversion to a single common path representation. 3. Add some new error codes. 4. Modify nccopy usage to mention the new chunking specification.
417 lines
12 KiB
C
417 lines
12 KiB
C
/*********************************************************************
|
|
* Copyright 2018, UCAR/Unidata
|
|
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
|
*********************************************************************/
|
|
|
|
#include "zincludes.h"
|
|
|
|
/**************************************************/
|
|
/* Forwards */
|
|
|
|
static int applycontrols(NCZ_FILE_INFO_T* zinfo);
|
|
|
|
/***************************************************/
|
|
/* API */
|
|
|
|
/**
|
|
@internal Create the topmost dataset object and setup up
|
|
NCZ_FILE_INFO_T state.
|
|
@param zinfo - [in] the internal state
|
|
@return NC_NOERR
|
|
@author Dennis Heimbigner
|
|
*/
|
|
|
|
int
|
|
ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** controls)
|
|
{
|
|
int stat = NC_NOERR;
|
|
NCZ_FILE_INFO_T* zinfo = NULL;
|
|
NCZ_GRP_INFO_T* zgrp = NULL;
|
|
NCURI* uri = NULL;
|
|
NC* nc = NULL;
|
|
NCjson* json = NULL;
|
|
char* key = NULL;
|
|
|
|
ZTRACE(3,"file=%s root=%s controls=%s",file->hdr.name,root->hdr.name,(controls?nczprint_envv(controls):"null"));
|
|
|
|
nc = (NC*)file->controller;
|
|
|
|
/* Add struct to hold NCZ-specific file metadata. */
|
|
if (!(zinfo = calloc(1, sizeof(NCZ_FILE_INFO_T))))
|
|
{stat = NC_ENOMEM; goto done;}
|
|
file->format_file_info = zinfo;
|
|
zinfo->common.file = file;
|
|
|
|
/* Add struct to hold NCZ-specific group info. */
|
|
if (!(zgrp = calloc(1, sizeof(NCZ_GRP_INFO_T))))
|
|
{stat = NC_ENOMEM; goto done;}
|
|
root->format_grp_info = zgrp;
|
|
zgrp->common.file = file;
|
|
|
|
/* Fill in NCZ_FILE_INFO_T */
|
|
zinfo->created = 1;
|
|
zinfo->common.file = file;
|
|
zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG);
|
|
if((zinfo->envv_controls=NCZ_clonestringvec(0,controls)) == NULL)
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
/* fill in some of the zinfo and zroot fields */
|
|
zinfo->zarr.zarr_version = atoi(ZARRVERSION);
|
|
sscanf(NCZARRVERSION,"%lu.%lu.%lu",
|
|
&zinfo->zarr.nczarr_version.major,
|
|
&zinfo->zarr.nczarr_version.minor,
|
|
&zinfo->zarr.nczarr_version.release);
|
|
|
|
/* Apply client controls */
|
|
if((stat = applycontrols(zinfo))) goto done;
|
|
|
|
/* Load auth info from rc file */
|
|
if((stat = ncuriparse(nc->path,&uri))) goto done;
|
|
if(uri) {
|
|
if((stat = NC_authsetup(&zinfo->auth, uri)))
|
|
goto done;
|
|
}
|
|
|
|
/* initialize map handle*/
|
|
if((stat = nczmap_create(zinfo->controls.mapimpl,nc->path,nc->mode,zinfo->controls.flags,NULL,&zinfo->map)))
|
|
goto done;
|
|
|
|
done:
|
|
ncurifree(uri);
|
|
NCJreclaim(json);
|
|
nullfree(key);
|
|
return ZUNTRACE(stat);
|
|
}
|
|
|
|
/**
|
|
@internal Open the topmost dataset object.
|
|
@param file - [in] the file struct
|
|
@param controls - [in] the fragment list in envv form from uri
|
|
@return NC_NOERR
|
|
@author Dennis Heimbigner
|
|
*/
|
|
|
|
int
|
|
ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
|
|
{
|
|
int stat = NC_NOERR;
|
|
NC* nc = NULL;
|
|
NC_GRP_INFO_T* root = NULL;
|
|
NCURI* uri = NULL;
|
|
void* content = NULL;
|
|
NCjson* json = NULL;
|
|
NCZ_FILE_INFO_T* zinfo = NULL;
|
|
int mode;
|
|
NClist* modeargs = NULL;
|
|
char* nczarr_version = NULL;
|
|
char* zarr_format = NULL;
|
|
|
|
ZTRACE(3,"file=%s controls=%s",file->hdr.name,(controls?nczprint_envv(controls):"null"));
|
|
|
|
/* Extract info reachable via file */
|
|
nc = (NC*)file->controller;
|
|
mode = nc->mode;
|
|
|
|
root = file->root_grp;
|
|
assert(root != NULL && root->hdr.sort == NCGRP);
|
|
|
|
/* Add struct to hold NCZ-specific file metadata. */
|
|
if (!(file->format_file_info = calloc(1, sizeof(NCZ_FILE_INFO_T))))
|
|
{stat = NC_ENOMEM; goto done;}
|
|
zinfo = file->format_file_info;
|
|
|
|
/* Fill in NCZ_FILE_INFO_T */
|
|
zinfo->created = 0;
|
|
zinfo->common.file = file;
|
|
zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG);
|
|
if((zinfo->envv_controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/
|
|
{stat = NC_ENOMEM; goto done;}
|
|
|
|
/* Add struct to hold NCZ-specific group info. */
|
|
if (!(root->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T))))
|
|
{stat = NC_ENOMEM; goto done;}
|
|
((NCZ_GRP_INFO_T*)root->format_grp_info)->common.file = file;
|
|
|
|
/* Apply client controls */
|
|
if((stat = applycontrols(zinfo))) goto done;
|
|
|
|
/* initialize map handle*/
|
|
if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map)))
|
|
goto done;
|
|
|
|
if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done;
|
|
|
|
if(nczarr_version == NULL) /* default */
|
|
nczarr_version = strdup(NCZARRVERSION);
|
|
if(zarr_format == NULL) /* default */
|
|
zarr_format = strdup(ZARRVERSION);
|
|
/* Extract the information from it */
|
|
if(sscanf(zarr_format,"%d",&zinfo->zarr.zarr_version)!=1)
|
|
{stat = NC_ENCZARR; goto done;}
|
|
if(sscanf(nczarr_version,"%lu.%lu.%lu",
|
|
&zinfo->zarr.nczarr_version.major,
|
|
&zinfo->zarr.nczarr_version.minor,
|
|
&zinfo->zarr.nczarr_version.release) == 0)
|
|
{stat = NC_ENCZARR; goto done;}
|
|
|
|
/* Load auth info from rc file */
|
|
if((stat = ncuriparse(nc->path,&uri))) goto done;
|
|
if(uri) {
|
|
if((stat = NC_authsetup(&zinfo->auth, uri)))
|
|
goto done;
|
|
}
|
|
|
|
done:
|
|
nullfree(zarr_format);
|
|
nullfree(nczarr_version);
|
|
ncurifree(uri);
|
|
nclistfreeall(modeargs);
|
|
if(json) NCJreclaim(json);
|
|
nullfree(content);
|
|
return ZUNTRACE(stat);
|
|
}
|
|
|
|
/**
|
|
* @internal Determine whether file is netCDF-4.
|
|
*
|
|
* For libzarr, this is always true.
|
|
*
|
|
* @param h5 Pointer to HDF5 file info struct.
|
|
*
|
|
* @returns NC_NOERR No error.
|
|
* @author Dennis Heimbigner.
|
|
*/
|
|
int
|
|
NCZ_isnetcdf4(struct NC_FILE_INFO* h5)
|
|
{
|
|
int isnc4 = 1;
|
|
NC_UNUSED(h5);
|
|
return isnc4;
|
|
}
|
|
|
|
/**
|
|
* @internal Determine version info
|
|
*
|
|
* For libzarr, this is not well defined
|
|
*
|
|
* @param majorp Pointer to major version number
|
|
* @param minorp Pointer to minor version number
|
|
* @param releasep Pointer to release version number
|
|
*
|
|
* @returns NC_NOERR No error.
|
|
* @author Dennis Heimbigner.
|
|
*/
|
|
int
|
|
NCZ_get_libversion(unsigned long* majorp, unsigned long* minorp,unsigned long* releasep)
|
|
{
|
|
unsigned long m0,m1,m2;
|
|
sscanf(NCZARRVERSION,"%lu.%lu.%lu",&m0,&m1,&m2);
|
|
if(majorp) *majorp = m0;
|
|
if(minorp) *minorp = m1;
|
|
if(releasep) *releasep = m2;
|
|
return NC_NOERR;
|
|
}
|
|
|
|
/**
|
|
* @internal Determine "superblock" number.
|
|
*
|
|
* For libzarr, use the value of the major part of the nczarr version.
|
|
*
|
|
* @param superblocp Pointer to place to return superblock.
|
|
* use the nczarr format version major as the superblock number.
|
|
*
|
|
* @returns NC_NOERR No error.
|
|
* @author Dennis Heimbigner.
|
|
*/
|
|
int
|
|
NCZ_get_superblock(NC_FILE_INFO_T* file, int* superblockp)
|
|
{
|
|
NCZ_FILE_INFO_T* zinfo = file->format_file_info;
|
|
if(superblockp) *superblockp = zinfo->zarr.nczarr_version.major;
|
|
return NC_NOERR;
|
|
}
|
|
|
|
/**************************************************/
|
|
/* Utilities */
|
|
|
|
#if 0
|
|
/**
|
|
@internal Open the root group object
|
|
@param dataset - [in] the root dataset object
|
|
@param rootp - [out] created root group
|
|
@return NC_NOERR
|
|
@author Dennis Heimbigner
|
|
*/
|
|
static int
|
|
ncz_open_rootgroup(NC_FILE_INFO_T* dataset)
|
|
{
|
|
int stat = NC_NOERR;
|
|
int i;
|
|
NCZ_FILE_INFO_T* zfile = NULL;
|
|
NC_GRP_INFO_T* root = NULL;
|
|
void* content = NULL;
|
|
char* rootpath = NULL;
|
|
NCjson* json = NULL;
|
|
|
|
ZTRACE(3,"dataset=",dataset->hdr.name);
|
|
|
|
zfile = dataset->format_file_info;
|
|
|
|
/* Root should already be defined */
|
|
root = dataset->root_grp;
|
|
|
|
assert(root != NULL);
|
|
|
|
if((stat=nczm_concat(NULL,ZGROUP,&rootpath)))
|
|
goto done;
|
|
if((stat = NCZ_downloadjson(zfile->map, rootpath, &json)))
|
|
goto done;
|
|
/* Process the json */
|
|
for(i=0;i<nclistlength(json->contents);i+=2) {
|
|
const NCjson* key = nclistget(json->contents,i);
|
|
const NCjson* value = nclistget(json->contents,i+1);
|
|
if(strcmp(NCJstring(key),"zarr_format")==0) {
|
|
int zversion;
|
|
if(sscanf(NCJstring(value),"%d",&zversion)!=1)
|
|
{stat = NC_ENOTNC; goto done;}
|
|
/* Verify against the dataset */
|
|
if(zversion != zfile->zarr.zarr_version)
|
|
{stat = NC_ENOTNC; goto done;}
|
|
}
|
|
}
|
|
|
|
done:
|
|
if(json) NCJreclaim(json);
|
|
nullfree(rootpath);
|
|
nullfree(content);
|
|
return ZUNTRACE(stat);
|
|
}
|
|
#endif
|
|
|
|
#if 0
|
|
/**
|
|
@internal Rewrite attributes into a group or var
|
|
@param map - [in] the map object for storage
|
|
@param container - [in] the containing object
|
|
@param jattrs - [in] the json for .zattrs
|
|
@param jtypes - [in] the json for .ztypes
|
|
@return NC_NOERR
|
|
@author Dennis Heimbigner
|
|
*/
|
|
int
|
|
ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjson* jtypes)
|
|
{
|
|
int stat = NC_NOERR;
|
|
char* fullpath = NULL;
|
|
char* akey = NULL;
|
|
char* tkey = NULL;
|
|
NCZMAP* map = zinfo->map;
|
|
|
|
assert((jattrs->sort = NCJ_DICT));
|
|
assert((jtypes->sort = NCJ_DICT));
|
|
|
|
if(container->sort == NCGRP) {
|
|
NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)container;
|
|
/* Get grp's fullpath name */
|
|
if((stat = NCZ_grpkey(grp,&fullpath)))
|
|
goto done;
|
|
} else {
|
|
NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container;
|
|
/* Get var's fullpath name */
|
|
if((stat = NCZ_varkey(var,&fullpath)))
|
|
goto done;
|
|
}
|
|
|
|
/* Construct the path to the .zattrs object */
|
|
if((stat = nczm_concat(fullpath,ZATTRS,&akey)))
|
|
goto done;
|
|
|
|
/* Always write as V2 */
|
|
|
|
{
|
|
NCjson* k = NULL;
|
|
NCjson* v = NULL;
|
|
/* remove any previous version */
|
|
if(NCJremove(jattrs,NCZ_V2_ATTRS,&k,&v) == NC_NOERR) {
|
|
NCJreclaim(k); NCJreclaim(v);
|
|
}
|
|
}
|
|
|
|
if(!(zinfo->controls.flags & FLAG_PUREZARR)) {
|
|
/* Insert the jtypes into the set of attributes */
|
|
if((stat = NCJinsert(jattrs,NCZ_V2_ATTRS,jtypes))) goto done;
|
|
}
|
|
|
|
/* Upload the .zattrs object */
|
|
if((stat=NCZ_uploadjson(map,tkey,jattrs)))
|
|
goto done;
|
|
|
|
done:
|
|
if(stat) {
|
|
NCJreclaim(jattrs);
|
|
NCJreclaim(jtypes);
|
|
}
|
|
nullfree(fullpath);
|
|
nullfree(akey);
|
|
nullfree(tkey);
|
|
return stat;
|
|
}
|
|
#endif
|
|
|
|
static const char*
|
|
controllookup(const char** envv_controls, const char* key)
|
|
{
|
|
const char** p;
|
|
for(p=envv_controls;*p;p+=2) {
|
|
if(strcasecmp(key,*p)==0) {
|
|
return p[1];
|
|
}
|
|
}
|
|
return NULL;
|
|
}
|
|
|
|
|
|
static int
|
|
applycontrols(NCZ_FILE_INFO_T* zinfo)
|
|
{
|
|
int i,stat = NC_NOERR;
|
|
const char* value = NULL;
|
|
NClist* modelist = nclistnew();
|
|
int noflags = 0; /* track non-default negative flags */
|
|
|
|
if((value = controllookup((const char**)zinfo->envv_controls,"mode")) != NULL) {
|
|
if((stat = NCZ_comma_parse(value,modelist))) goto done;
|
|
}
|
|
/* Process the modelist first */
|
|
zinfo->controls.mapimpl = NCZM_DEFAULT;
|
|
for(i=0;i<nclistlength(modelist);i++) {
|
|
const char* p = nclistget(modelist,i);
|
|
if(strcasecmp(p,PUREZARRCONTROL)==0) zinfo->controls.flags |= (FLAG_PUREZARR|FLAG_XARRAYDIMS);
|
|
else if(strcasecmp(p,XARRAYCONTROL)==0) zinfo->controls.flags |= (FLAG_XARRAYDIMS|FLAG_PUREZARR); /*xarray=>zarr*/
|
|
else if(strcasecmp(p,NOXARRAYCONTROL)==0) {
|
|
noflags |= FLAG_XARRAYDIMS;
|
|
zinfo->controls.flags |= FLAG_PUREZARR; /*noxarray=>zarr*/
|
|
}
|
|
else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP;
|
|
else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE;
|
|
else if(strcasecmp(p,"s3")==0) zinfo->controls.mapimpl = NCZM_S3;
|
|
}
|
|
/* Apply negative controls by turning off negative flags */
|
|
/* This is necessary to avoid order dependence of mode flags when both positive and negative flags are defined */
|
|
zinfo->controls.flags &= (~noflags);
|
|
|
|
/* Process other controls */
|
|
if((value = controllookup((const char**)zinfo->envv_controls,"log")) != NULL) {
|
|
zinfo->controls.flags |= FLAG_LOGGING;
|
|
ncsetlogging(1);
|
|
}
|
|
if((value = controllookup((const char**)zinfo->envv_controls,"show")) != NULL) {
|
|
if(strcasecmp(value,"fetch")==0)
|
|
zinfo->controls.flags |= FLAG_SHOWFETCH;
|
|
}
|
|
done:
|
|
nclistfreeall(modelist);
|
|
return stat;
|
|
}
|
|
|