netcdf-c/libnczarr/zclose.c
Dennis Heimbigner 231ae96c4b Add support for Zarr string type to NCZarr
* re: https://github.com/Unidata/netcdf-c/pull/2278
* re: https://github.com/Unidata/netcdf-c/issues/2485
* re: https://github.com/Unidata/netcdf-c/issues/2474

This PR subsumes PR https://github.com/Unidata/netcdf-c/pull/2278.
Actually is a bit an omnibus covering several issues.

## PR https://github.com/Unidata/netcdf-c/pull/2278
Add support for the Zarr string type.
Zarr strings are restricted currently to be of fixed size.
The primary issue to be addressed is to provide a way for user to
specify the size of the fixed length strings. This is handled by providing
the following new attributes special:
1. **_nczarr_default_maxstrlen** —
This is an attribute of the root group. It specifies the default
maximum string length for string types. If not specified, then
it has the value of 64 characters.
2. **_nczarr_maxstrlen** —
This is a per-variable attribute. It specifies the maximum
string length for the string type associated with the variable.
If not specified, then it is assigned the value of
**_nczarr_default_maxstrlen**.

This PR also requires some hacking to handle the existing netcdf-c NC_CHAR
type, which does not exist in zarr. The goal was to choose numpy types for
both the netcdf-c NC_STRING type and the netcdf-c NC_CHAR type such that
if a pure zarr implementation read them, it would still work and an
NC_CHAR type would be handled by zarr as a string of length 1.

For writing variables and NCZarr attributes, the type mapping is as follows:
* "|S1" for NC_CHAR.
* ">S1" for NC_STRING && MAXSTRLEN==1
* ">Sn" for NC_STRING && MAXSTRLEN==n

Note that it is a bit of a hack to use endianness, but it should be ok since for
string/char, the endianness has no meaning.

For reading attributes with pure zarr (i.e. with no nczarr
atribute types defined), they will always be interpreted as of
type NC_CHAR.

## Issue: https://github.com/Unidata/netcdf-c/issues/2474
This PR partly fixes this issue because it provided more
comprehensive support for Zarr attributes that are JSON valued expressions.
This PR still does not address the problem in that issue where the
_ARRAY_DIMENSION attribute is incorrectly set. Than can only be
fixed by the creator of the datasets.

## Issue: https://github.com/Unidata/netcdf-c/issues/2485
This PR also fixes the scalar failure shown in this issue.
It generally cleans up scalar handling.
It also adds a note to the documentation describing that
NCZarr supports scalars while Zarr does not and also how
scalar interoperability is achieved.

## Misc. Other Changes
1. Convert the nczarr special attributes and keys to be all lower case. So "_NCZARR_ATTR" now used "_nczarr_attr. Support back compatibility for the upper case names.
2. Cleanup my too-clever-by-half handling of scalars in libnczarr.
2022-08-27 20:21:13 -06:00

295 lines
7.5 KiB
C

/*********************************************************************
* Copyright 1993, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*********************************************************************/
#include "zincludes.h"
#include "zfilter.h"
/* Forward */
static int zclose_group(NC_GRP_INFO_T*);
static int zclose_gatts(NC_GRP_INFO_T*);
static int zclose_vars(NC_GRP_INFO_T*);
static int zclose_dims(NC_GRP_INFO_T*);
static int zclose_types(NC_GRP_INFO_T*);
static int zclose_type(NC_TYPE_INFO_T* type);
static int zwrite_vars(NC_GRP_INFO_T *grp);
/**************************************************/
/**
* @internal This function will recurse through an open ZARR file and
* release resources. All ZARR annotations reclaimed
*
* @param file Pointer to ZARR file info struct.
* @param abort True if this is an abort.
*
* @return ::NC_NOERR No error.
* @return ::NC_ENCZARR could not close the file.
* @author Dennis Heimbigner
*/
int
ncz_close_file(NC_FILE_INFO_T* file, int abort)
{
int stat = NC_NOERR;
NCZ_FILE_INFO_T* zinfo = NULL;
ZTRACE(2,"file=%s abort=%d",file->hdr.name,abort);
if(!abort) {
/* Flush | create all chunks for all vars */
if((stat=zwrite_vars(file->root_grp))) goto done;
}
/* Internal close to reclaim zarr annotations */
if((stat = zclose_group(file->root_grp)))
goto done;
zinfo = file->format_file_info;
if((stat = nczmap_close(zinfo->map,(abort && zinfo->created)?1:0)))
goto done;
NCZ_freestringvec(0,zinfo->envv_controls);
NC_authfree(zinfo->auth);
nullfree(zinfo);
done:
return ZUNTRACE(stat);
}
/**************************************************/
/**
* @internal Recursively free zarr annotations for a group (and everything
* it contains).
*
* @param grp Pointer to group info struct.
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zclose_group(NC_GRP_INFO_T *grp)
{
int stat = NC_NOERR;
NCZ_GRP_INFO_T* zgrp;
int i;
assert(grp && grp->format_grp_info != NULL);
LOG((3, "%s: grp->name %s", __func__, grp->hdr.name));
/* Recursively call this function for each child, if any, stopping
* if there is an error. */
for(i=0; i<ncindexsize(grp->children); i++) {
if ((stat = zclose_group((NC_GRP_INFO_T*)ncindexith(grp->children,i))))
goto done;
}
/* Close resources associated with global attributes. */
if ((stat = zclose_gatts(grp)))
goto done;
/* Close resources associated with vars. */
if ((stat = zclose_vars(grp)))
goto done;
/* Close resources associated with dims. */
if ((stat = zclose_dims(grp)))
goto done;
/* Close resources associated with types. */
if ((stat = zclose_types(grp)))
goto done;
/* Close the zgroup. */
zgrp = grp->format_grp_info;
LOG((4, "%s: closing group %s", __func__, grp->hdr.name));
nullfree(zgrp);
grp->format_grp_info = NULL; /* avoid memory errors */
done:
return stat;
}
/**
* @internal Close resources for global atts in a group.
*
* @param grp Pointer to group info struct.
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zclose_gatts(NC_GRP_INFO_T* grp)
{
int stat = NC_NOERR;
NC_ATT_INFO_T *att;
int a;
for(a = 0; a < ncindexsize(grp->att); a++) {
NCZ_ATT_INFO_T* zatt = NULL;
att = (NC_ATT_INFO_T* )ncindexith(grp->att, a);
assert(att && att->format_att_info != NULL);
zatt = att->format_att_info;
nullfree(zatt);
att->format_att_info = NULL; /* avoid memory errors */
}
return stat;
}
/**
* @internal Close resources for vars in a group.
*
* @param grp Pointer to group info struct.
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zclose_vars(NC_GRP_INFO_T* grp)
{
int stat = NC_NOERR;
NC_VAR_INFO_T* var;
NCZ_VAR_INFO_T* zvar;
NC_ATT_INFO_T* att;
int a, i;
for(i = 0; i < ncindexsize(grp->vars); i++) {
var = (NC_VAR_INFO_T*)ncindexith(grp->vars, i);
assert(var && var->format_var_info);
zvar = var->format_var_info;;
for(a = 0; a < ncindexsize(var->att); a++) {
NCZ_ATT_INFO_T* zatt;
att = (NC_ATT_INFO_T*)ncindexith(var->att, a);
assert(att && att->format_att_info);
zatt = att->format_att_info;
nullfree(zatt);
att->format_att_info = NULL; /* avoid memory errors */
}
#ifdef ENABLE_NCZARR_FILTERS
/* Reclaim filters */
if(var->filters != NULL) {
(void)NCZ_filter_freelists(var);
}
var->filters = NULL;
#endif
/* Reclaim the type */
if(var->type_info) (void)zclose_type(var->type_info);
if(zvar->cache) NCZ_free_chunk_cache(zvar->cache);
/* reclaim xarray */
if(zvar->xarray) nclistfreeall(zvar->xarray);
nullfree(zvar);
var->format_var_info = NULL; /* avoid memory errors */
}
return stat;
}
/**
* @internal Close resources for dims in a group.
*
* @param grp Pointer to group info struct.
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zclose_dims(NC_GRP_INFO_T* grp)
{
int stat = NC_NOERR;
NC_DIM_INFO_T* dim;
int i;
for(i = 0; i < ncindexsize(grp->dim); i++) {
NCZ_DIM_INFO_T* zdim;
dim = (NC_DIM_INFO_T*)ncindexith(grp->dim, i);
assert(dim && dim->format_dim_info);
zdim = dim->format_dim_info;
nullfree(zdim);
dim->format_dim_info = NULL; /* avoid memory errors */
}
return stat;
}
/**
* @internal Close resources for a single type. Set values to
* 0 after closing types. Because of type reference counters, these
* closes can be called multiple times.
*
* @param type Pointer to type struct.
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zclose_type(NC_TYPE_INFO_T* type)
{
int stat = NC_NOERR;
assert(type && type->format_type_info != NULL);
nullfree(type->format_type_info);
return stat;
}
/**
* @internal Close resources for types in a group. Set values to
* 0 after closing types. Because of type reference counters, these
* closes can be called multiple times.
* Warning: note that atomic types are not covered here; this
* is only for user-defined types.
*
* @param grp Pointer to group info struct.
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zclose_types(NC_GRP_INFO_T* grp)
{
int stat = NC_NOERR;
int i;
NC_TYPE_INFO_T* type;
for(i = 0; i < ncindexsize(grp->type); i++)
{
type = (NC_TYPE_INFO_T*)ncindexith(grp->type, i);
if((stat = zclose_type(type))) goto done;
}
done:
return stat;
}
/**
* @internal Recursively flush/create all data for all vars.
*
* @param grp Pointer to group info struct whose vars need to be written
*
* @return ::NC_NOERR No error.
* @author Dennis Heimbigner
*/
static int
zwrite_vars(NC_GRP_INFO_T *grp)
{
int stat = NC_NOERR;
int i;
assert(grp && grp->format_grp_info != NULL);
LOG((3, "%s: grp->name %s", __func__, grp->hdr.name));
/* Write all vars for this group breadth first */
for(i = 0; i < ncindexsize(grp->vars); i++) {
NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)ncindexith(grp->vars, i);
if((stat = ncz_write_var(var))) goto done;
}
/* Recursively call this function for each child group, if any, stopping
* if there is an error. */
for(i=0; i<ncindexsize(grp->children); i++) {
if ((stat = zwrite_vars((NC_GRP_INFO_T*)ncindexith(grp->children,i))))
goto done;
}
done:
return stat;
}