mirror of
https://github.com/Unidata/netcdf-c.git
synced 2025-04-18 18:20:39 +08:00
Turn on the xarray convention for NCZarr format
re: https://github.com/pydata/xarray/issues/6374 As a result of a discussion about Xarray (see above issue), I decided to turn on the xarray convention for NCZarr datasets where possible so that xarray can read a larger set of nczarr generated datasets. This causes the following changes: * If the user wants to generate a pure zarr file, then the mode "zarr" must be explicitly used; it is no longer the case that "mode=xarray" or mode="noxarray" implies "mode=zarr". * It is still the case that "mode=noxarray" will turn off the XArray convention. The following conditions will cause ''_ARRAY_DIMENSIONS'' to not be written. * The variable is not in the root group, * Any dimension referenced by the variable is not in the root group.
This commit is contained in:
parent
cd0f1690e8
commit
a5cae51efc
2
.github/workflows/run_tests_ubuntu.yml
vendored
2
.github/workflows/run_tests_ubuntu.yml
vendored
@ -4,7 +4,7 @@
|
||||
|
||||
name: Run Ubuntu/Linux netCDF Tests
|
||||
|
||||
on: [ pull_request ]
|
||||
on: [pull_request]
|
||||
|
||||
jobs:
|
||||
|
||||
|
@ -7,6 +7,7 @@ This file contains a high-level description of this package's evolution. Release
|
||||
|
||||
## 4.8.2 - TBD
|
||||
|
||||
* [Enhancement] Turn on the XArray convention for NCZarr files by default. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
|
||||
* [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #2245](https://github.com/Unidata/netcdf-c/pull/2245).
|
||||
* [Bug Fix] Require that the type of the variable in nc_def_var_filter is not variable length. See [Github #/2231](https://github.com/Unidata/netcdf-c/pull/2231).
|
||||
* [File Change] Apply HDF5 v1.8 format compatibility when writing to previous files, as well as when creating new files. The superblock version remains at 2 for newly created files. Full backward read/write compatibility for netCDF-4 is maintained in all cases. See [Github #2176](https://github.com/Unidata/netcdf-c/issues/2176).
|
||||
|
@ -136,16 +136,18 @@ Note that It should be the case that zipping a _file_
|
||||
format directory tree will produce a file readable by the
|
||||
_zip_ storage format, and vice-versa.
|
||||
|
||||
By default, _mode=zarr_ also supports the XArray _\_ARRAY\_DIMENSIONS_ convention. The _noxarray_ mode tells the library to disable the XArray support.
|
||||
By default, the XArray convention is supported and used for
|
||||
both NCZarr files and pure Zarr files. This
|
||||
means that every variable in the root group whose named dimensions
|
||||
are also in the root group will have an attribute called
|
||||
*\_ARRAY\_DIMENSIONS* that stores those dimension names.
|
||||
The _noxarray_ mode tells the library to disable the XArray support.
|
||||
|
||||
The netcdf-c library is capable of inferring additional mode flags based on the flags it finds. Currently we have the following inferences.
|
||||
|
||||
- _xarray_ => _zarr_
|
||||
- _noxarray_ => _zarr_
|
||||
- _zarr_ => _nczarr_
|
||||
|
||||
So for example: ````...#mode=noxarray,zip```` is equivalent to this.
|
||||
````...#mode=nczarr,zarr,noxarray,zip
|
||||
So for example: ````...#mode=zarr,zip```` is equivalent to this.
|
||||
````...#mode=nczarr,zarr,zip
|
||||
````
|
||||
<!--
|
||||
- log=<output-stream>: this control turns on logging output,
|
||||
@ -434,10 +436,13 @@ The value of this attribute is a list of dimension names (strings).
|
||||
An example might be ````["time", "lon", "lat"]````.
|
||||
It is essentially equivalent to the ````_NCZARR_ARRAY "dimrefs" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset.
|
||||
|
||||
As of _netcdf-c_ version 4.8.1, The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported.
|
||||
This attribute will be read/written by default, but can be suppressed if the mode value "noxarray" is specified.
|
||||
As of _netcdf-c_ version 4.8.2, The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr.
|
||||
If possible, this attribute will be read/written by default,
|
||||
but can be suppressed if the mode value "noxarray" is specified.
|
||||
If detected, then these dimension names are used to define shared dimensions.
|
||||
Note that "noxarray" or "xarray" implies pure zarr format.
|
||||
The following conditions will cause ''_ARRAY_DIMENSIONS'' to not be written.
|
||||
* The variable is not in the root group,
|
||||
* Any dimension referenced by the variable is not in the root group.
|
||||
|
||||
# Examples {#nczarr_examples}
|
||||
|
||||
|
@ -137,9 +137,9 @@ static const struct MACRODEF {
|
||||
{"dap4","mode",{"dap4",NULL}},
|
||||
{"s3","mode",{"s3","nczarr",NULL}},
|
||||
{"bytes","mode",{"bytes",NULL}},
|
||||
{"xarray","mode",{"nczarr","zarr","xarray",NULL}},
|
||||
{"noxarray","mode",{"nczarr","zarr","noxarray",NULL}},
|
||||
{"zarr","mode",{"nczarr","zarr","xarray",NULL}},
|
||||
{"xarray","mode",{"nczarr", NULL}},
|
||||
{"noxarray","mode",{"nczarr", "noxarray", NULL}},
|
||||
{"zarr","mode",{"nczarr","zarr", NULL}},
|
||||
{NULL,NULL,{NULL}}
|
||||
};
|
||||
|
||||
@ -149,9 +149,8 @@ static const struct MODEINFER {
|
||||
char* inference;
|
||||
} modeinferences[] = {
|
||||
{"zarr","nczarr"},
|
||||
{"zarr","xarray"},
|
||||
{"xarray","zarr"},
|
||||
{"noxarray","zarr"},
|
||||
{"xarray","nczarr"},
|
||||
{"noxarray","nczarr"},
|
||||
{NULL,NULL}
|
||||
};
|
||||
|
||||
|
@ -384,13 +384,12 @@ applycontrols(NCZ_FILE_INFO_T* zinfo)
|
||||
}
|
||||
/* Process the modelist first */
|
||||
zinfo->controls.mapimpl = NCZM_DEFAULT;
|
||||
zinfo->controls.flags |= FLAG_XARRAYDIMS; /* Always support XArray convention where possible */
|
||||
for(i=0;i<nclistlength(modelist);i++) {
|
||||
const char* p = nclistget(modelist,i);
|
||||
if(strcasecmp(p,PUREZARRCONTROL)==0) zinfo->controls.flags |= (FLAG_PUREZARR|FLAG_XARRAYDIMS);
|
||||
else if(strcasecmp(p,XARRAYCONTROL)==0) zinfo->controls.flags |= (FLAG_XARRAYDIMS|FLAG_PUREZARR); /*xarray=>zarr*/
|
||||
if(strcasecmp(p,PUREZARRCONTROL)==0) zinfo->controls.flags |= (FLAG_PUREZARR);
|
||||
else if(strcasecmp(p,NOXARRAYCONTROL)==0) {
|
||||
noflags |= FLAG_XARRAYDIMS;
|
||||
zinfo->controls.flags |= FLAG_PUREZARR; /*noxarray=>zarr*/
|
||||
}
|
||||
else if(strcasecmp(p,"zip")==0) zinfo->controls.mapimpl = NCZM_ZIP;
|
||||
else if(strcasecmp(p,"file")==0) zinfo->controls.mapimpl = NCZM_FILE;
|
||||
|
@ -695,20 +695,33 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc
|
||||
|
||||
if(container->sort == NCVAR) {
|
||||
if(isrootgroup && isxarray) {
|
||||
int dimsinroot = 1;
|
||||
/* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */
|
||||
if((stat = NCJnew(NCJ_ARRAY,&jdimrefs)))
|
||||
goto done;
|
||||
/* Walk the dimensions and capture the names */
|
||||
/* Walk the dimensions to check in root group */
|
||||
for(i=0;i<var->ndims;i++) {
|
||||
NC_DIM_INFO_T* dim = var->dim[i];
|
||||
char* dimname = strdup(dim->hdr.name);
|
||||
if(dimname == NULL) {stat = NC_ENOMEM; goto done;}
|
||||
NCJaddstring(jdimrefs,NCJ_STRING,dimname);
|
||||
nullfree(dimname); dimname = NULL;
|
||||
/* Verify that the dimension is in the root group */
|
||||
if(dim->container && dim->container->parent != NULL) {
|
||||
dimsinroot = 0; /* dimension is not in root */
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(dimsinroot) {
|
||||
/* Walk the dimensions and capture the names */
|
||||
for(i=0;i<var->ndims;i++) {
|
||||
char* dimname;
|
||||
NC_DIM_INFO_T* dim = var->dim[i];
|
||||
dimname = strdup(dim->hdr.name);
|
||||
if(dimname == NULL) {stat = NC_ENOMEM; goto done;}
|
||||
NCJaddstring(jdimrefs,NCJ_STRING,dimname);
|
||||
nullfree(dimname); dimname = NULL;
|
||||
}
|
||||
/* Add the _ARRAY_DIMENSIONS attribute */
|
||||
if((stat = NCJinsert(jatts,NC_XARRAY_DIMS,jdimrefs))) goto done;
|
||||
jdimrefs = NULL;
|
||||
}
|
||||
/* Add the _ARRAY_DIMENSIONS attribute */
|
||||
if((stat = NCJinsert(jatts,NC_XARRAY_DIMS,jdimrefs))) goto done;
|
||||
jdimrefs = NULL;
|
||||
}
|
||||
}
|
||||
/* Add Quantize Attribute */
|
||||
@ -736,9 +749,11 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc
|
||||
/* Insert the _NCZARR_ATTR attribute */
|
||||
if((stat = NCJnew(NCJ_DICT,&jdict)))
|
||||
goto done;
|
||||
if((stat = NCJinsert(jdict,"types",jtypes))) goto done;
|
||||
if(jtypes != NULL)
|
||||
{if((stat = NCJinsert(jdict,"types",jtypes))) goto done;}
|
||||
jtypes = NULL;
|
||||
if((stat = NCJinsert(jatts,NCZ_V2_ATTR,jdict))) goto done;
|
||||
if(jdict != NULL)
|
||||
{if((stat = NCJinsert(jatts,NCZ_V2_ATTR,jdict))) goto done;}
|
||||
jdict = NULL;
|
||||
}
|
||||
/* write .zattrs path */
|
||||
@ -1449,7 +1464,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames)
|
||||
|
||||
if(zinfo->controls.flags & FLAG_PUREZARR) purezarr = 1;
|
||||
if(zinfo->controls.flags & FLAG_NCZARR_V1) formatv1 = 1;
|
||||
if(zinfo->controls.flags & FLAG_XARRAYDIMS) {purezarr = 1; xarray = 1;}
|
||||
if(zinfo->controls.flags & FLAG_XARRAYDIMS) {xarray = 1;}
|
||||
|
||||
/* Load each var in turn */
|
||||
for(i = 0; i < nclistlength(varnames); i++) {
|
||||
@ -2246,10 +2261,8 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra
|
||||
|
||||
assert(zfile && zvar);
|
||||
|
||||
/* xarray => purezarr */
|
||||
assert(!xarray || purezarr);
|
||||
|
||||
if(xarray) {/* Read in the attributes to get xarray dimdef attribute; Note that it might not exist */
|
||||
if(purezarr && xarray) {/* Read in the attributes to get xarray dimdef attribute; Note that it might not exist */
|
||||
/* Note that if xarray && !purezarr, then xarray will be superceded by the nczarr dimensions key */
|
||||
char zdimname[4096];
|
||||
if(zvar->xarray == NULL) {
|
||||
assert(nclistlength(dimnames) == 0);
|
||||
|
@ -27,8 +27,9 @@ main() {
|
||||
if(fwrite(TRUNCATED_FILE_CONTENT, sizeof(char), sizeof(TRUNCATED_FILE_CONTENT), fp) != sizeof(TRUNCATED_FILE_CONTENT)) ERR;
|
||||
fclose(fp);
|
||||
|
||||
int ncid;
|
||||
if (nc_open(FILE_NAME, 0, &ncid) != NC_EHDFERR) ERR;
|
||||
int ncid, stat;
|
||||
stat = nc_open(FILE_NAME, 0, &ncid);
|
||||
if (stat != NC_EHDFERR && stat != NC_ENOFILTER && stat != NC_ENOTNC) ERR;
|
||||
}
|
||||
|
||||
{
|
||||
|
@ -1,9 +1,10 @@
|
||||
netcdf ref_oldformat {
|
||||
dimensions:
|
||||
lat = 8 ;
|
||||
_zdim_8 = 8 ;
|
||||
_zdim_10 = 10 ;
|
||||
variables:
|
||||
int lat(_zdim_8) ;
|
||||
int lat(lat) ;
|
||||
lat:_FillValue = -1 ;
|
||||
lat:lat_attr = "latitude" ;
|
||||
data:
|
||||
|
@ -15,7 +15,7 @@ testcase() {
|
||||
zext=$1
|
||||
|
||||
echo "*** Test: pure zarr write then read; format=$zext"
|
||||
fileargs tmp_purezarr "mode=noxarray,$zext"
|
||||
fileargs tmp_purezarr "mode=zarr,noxarray,$zext"
|
||||
deletemap $zext $file
|
||||
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
|
||||
${NCDUMP} $fileurl > tmp_purezarr_${zext}.cdl
|
||||
@ -27,9 +27,9 @@ fileargs tmp_xarray "mode=zarr,$zext"
|
||||
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
|
||||
${NCDUMP} $fileurl > tmp_xarray_${zext}.cdl
|
||||
diff -b ${srcdir}/ref_xarray.cdl tmp_xarray_${zext}.cdl
|
||||
echo "*** Test: pure zarr reading nczarr; format=$zext"
|
||||
fileargs tmp_nczarr "mode=nczarr,$zext"
|
||||
|
||||
echo "*** Test: pure zarr reading nczarr; format=$zext"
|
||||
fileargs tmp_nczarr "mode=nczarr,noxarray,$zext"
|
||||
deletemap $zext $file
|
||||
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_whole.cdl
|
||||
fileargs tmp_nczarr "mode=zarr,$zext"
|
||||
|
Loading…
x
Reference in New Issue
Block a user