Merge pull request #2278 from DennisHeimbigner/jsonconvention.dmh

Allow the read/write of JSON-valued Zarr attributes.
This commit is contained in:
Ward Fisher 2022-05-17 13:25:21 -06:00 committed by GitHub
commit 6e8e1c3ece
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 270 additions and 74 deletions

View File

@ -8,6 +8,8 @@ This file contains a high-level description of this package's evolution. Release
## 4.8.2 - TBD
* [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow
for domain specific info such as used by GDAL/Zarr. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????).
* [Enhancement] Turn on the XArray convention for NCZarr files by default. WARNING, this means that the mode should explicitly specify nczarr" or "zarr" even if "xarray" or "noxarray" is specified. See [Github #2257](https://github.com/Unidata/netcdf-c/pull/2257).
* [Enhancement] Update the documentation to match the current filter capabilities See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249).
* [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318).

View File

@ -56,9 +56,12 @@ struct NCJconst {int bval; long long ival; double dval; char* sval;};
extern "C" {
#endif
/* Parse a JSON string */
/* Parse a string to NCjson*/
DLLEXPORT int NCJparse(const char* text, unsigned flags, NCjson** jsonp);
/* Parse a counted string to NCjson*/
DLLEXPORT int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp);
/* Reclaim a JSON tree */
DLLEXPORT extern void NCJreclaim(NCjson* json);

View File

@ -94,6 +94,7 @@ static int NCJyytext(NCJparser*, char* start, size_t pdlen);
static void NCJreclaimArray(struct NCjlist*);
static void NCJreclaimDict(struct NCjlist*);
static int NCJunescape(NCJparser* parser);
static int unescape1(int c);
static int listappend(struct NCjlist* list, NCjson* element);
#ifndef NETCDF_JSON_H
@ -109,24 +110,28 @@ static int bytesappendc(NCJbuf* bufp, const char c);
int
NCJparse(const char* text, unsigned flags, NCjson** jsonp)
{
return NCJparsen(strlen(text),text,flags,jsonp);
}
int
NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp)
{
int stat = NCJ_OK;
size_t len;
NCJparser* parser = NULL;
NCjson* json = NULL;
/* Need at least 1 character of input */
if(text == NULL || text[0] == '\0')
if(len == 0 || text == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
if(jsonp == NULL) goto done;
parser = calloc(1,sizeof(NCJparser));
if(parser == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
len = strlen(text);
parser->text = (char*)malloc(len+1+1);
if(parser->text == NULL)
{stat = NCJTHROW(NCJ_ERR); goto done;}
strcpy(parser->text,text);
memcpy(parser->text,text,len);
parser->text[len] = '\0';
parser->text[len+1] = '\0';
parser->pos = &parser->text[0];
@ -334,16 +339,21 @@ NCJlex(NCJparser* parser)
c = *parser->pos;
if(c == '\0') {
token = NCJ_EOF;
} else if(c <= ' ' || c == '\177') {
} else if(c <= ' ' || c == '\177') {/* ignore whitespace */
parser->pos++;
continue; /* ignore whitespace */
continue;
} else if(c == NCJ_ESCAPE) {
parser->pos++;
c = *parser->pos;
*parser->pos = unescape1(c);
continue;
} else if(strchr(JSON_WORD, c) != NULL) {
start = parser->pos;
for(;;) {
c = *parser->pos++;
if(c == '\0' || strchr(JSON_WORD,c) == NULL) break; /* end of word */
}
/* Pushback c if not whitespace */
/* Pushback c */
parser->pos--;
count = ((parser->pos) - start);
if(NCJyytext(parser,start,count)) goto done;
@ -604,6 +614,21 @@ NCJunescape(NCJparser* parser)
return NCJTHROW(NCJ_OK);
}
/* Unescape a single character */
static int
unescape1(int c)
{
switch (c) {
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
default: c = c; break;/* technically not Json conformant */
}
return c;
}
#ifdef NCJDEBUG
static char*
tokenname(int token)
@ -896,7 +921,7 @@ NCJunparseR(const NCjson* json, NCJbuf* buf, unsigned flags)
if(json->list.len > 0 && json->list.contents != NULL) {
int shortlist = 0;
for(i=0;!shortlist && i < json->list.len;i+=2) {
if(i > 0) bytesappendc(buf,NCJ_COMMA);
if(i > 0) {bytesappendc(buf,NCJ_COMMA);bytesappendc(buf,' ');};
NCJunparseR(json->list.contents[i],buf,flags); /* key */
bytesappendc(buf,NCJ_COLON);
bytesappendc(buf,' ');
@ -945,7 +970,7 @@ escape(const char* text, NCJbuf* buf)
case '\n': replace = 'n'; break;
case '\r': replace = 'r'; break;
case '\t': replace = 't'; break;
case NCJ_QUOTE: replace = '\''; break;
case NCJ_QUOTE: replace = '\"'; break;
case NCJ_ESCAPE: replace = '\\'; break;
default: break;
}

View File

@ -12,13 +12,17 @@
#undef FILLONCLOSE
/*mnemonics*/
#define DICTOPEN '{'
#define DICTCLOSE '}'
/* Forward */
static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp);
static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose);
static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp);
static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes);
static int zconvert(nc_type typeid, size_t typelen, void* dst, NCjson* src);
static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst);
static int computeattrinfo(const char* name, NClist* atypes, NCjson* values,
nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap);
static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps);
@ -37,6 +41,8 @@ static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, s
static int inferattrtype(NCjson* values, nc_type* typeidp);
static int mininttype(unsigned long long u64, int negative);
static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims);
static int read_dict(NCjson* jdict, NCjson** jtextp);
static int write_dict(size_t len, const void* data, NCjson** jsonp);
/**************************************************/
/**************************************************/
@ -791,6 +797,7 @@ done:
Note that this does not push to the file.
Also note that attributes of length 1 are stored as singletons, not arrays.
This is to be more consistent with pure zarr.
Also implements the JSON dictionary convention.
@param attlist - [in] the attributes to dictify
@param jattrsp - [out] the json'ized att list
@return NC_NOERR
@ -800,7 +807,7 @@ static int
ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
{
int stat = NC_NOERR;
int i;
int i, isdict;
NCjson* jattrs = NULL;
NCjson* akey = NULL;
NCjson* jdata = NULL;
@ -810,9 +817,18 @@ ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp)
/* Iterate over the attribute list */
for(i=0;i<ncindexsize(attlist);i++) {
NC_ATT_INFO_T* att = (NC_ATT_INFO_T*)ncindexith(attlist,i);
isdict = 0;
/* Create the attribute dict value*/
if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
goto done;
if(att->nc_typeid == NC_CHAR
&& ((char*)att->data)[0] == DICTOPEN
&& ((char*)att->data)[att->len-1] == DICTCLOSE) {
/* this is subject to the JSON dictionary convention? */
if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1;
}
if(!isdict) {
if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata)))
goto done;
}
if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done;
jdata = NULL;
}
@ -933,7 +949,7 @@ done:
/* Convert a json value to actual data values of an attribute. */
static int
zconvert(nc_type typeid, size_t typelen, void* dst0, NCjson* src)
zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst0)
{
int stat = NC_NOERR;
int i;
@ -1019,6 +1035,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
void* data = NULL;
size_t typelen;
nc_type typeid = NC_NAT;
NCjson* jtext = NULL;
int reclaimvalues = 0;
/* Get assumed type */
@ -1026,12 +1043,20 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done;
if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;}
if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
goto done;
/* Collect the length of the attribute; might be a singleton */
switch (NCJsort(values)) {
case NCJ_DICT: stat = NC_ENCZARR; goto done;
case NCJ_ARRAY:
count = NCJlength(values);
break;
case NCJ_DICT:
/* Apply the JSON dictionary convention and convert to string */
if((stat = read_dict(values,&jtext))) goto done;
values = jtext; jtext = NULL;
reclaimvalues = 1;
/* fall thru */
case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */
if(typeid == NC_CHAR) {
count = strlen(NCJstring(values));
@ -1044,10 +1069,8 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
break;
}
if(count > 0) {
if(count > 0 && data == NULL) {
/* Allocate data space */
if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen)))
goto done;
if(typeid == NC_CHAR)
data = malloc(typelen*(count+1));
else
@ -1055,7 +1078,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp
if(data == NULL)
{stat = NC_ENOMEM; goto done;}
/* convert to target type */
if((stat = zconvert(typeid, typelen, data, values)))
if((stat = zconvert(typeid, typelen, values, data)))
goto done;
}
if(lenp) *lenp = count;
@ -1094,7 +1117,9 @@ inferattrtype(NCjson* value, nc_type* typeidp)
case NCJ_NULL:
typeid = NC_CHAR;
return NC_NOERR;
case NCJ_DICT: /* fall thru */
case NCJ_DICT:
typeid = NC_CHAR;
goto done;
case NCJ_UNDEF:
return NC_EINVAL;
default: /* atomic */
@ -2302,42 +2327,48 @@ done:
return THROW(stat);
}
#if 0
Not currently used
Special compatibility case:
if the value of the attribute is a dictionary,
or an array with non-atomic values, then
then stringify it and pretend it is of char type.
/* Return 1 if this json is not an
atomic value or an array of atomic values.
That is, it does not look like valid
attribute data.
/**
Implement the JSON convention for dictionaries.
Reading: If the value of the attribute is a dictionary, then stringify
it as the value and make the attribute be of type "char".
Writing: if the attribute is of type char and looks like a JSON dictionary,
then parse it as JSON and use that as its value in .zattrs.
*/
static int
iscomplexjson(NCjson* j)
read_dict(NCjson* jdict, NCjson** jtextp)
{
int i;
switch(NCJsort(j)) {
case NCJ_ARRAY:
/* verify that the elements of the array are not complex */
for(i=0;i<NCJlength(j);i++) {
switch (NCJith(j,NCJsort(i)))) {
case NCJ_DICT:
case NCJ_ARRAY:
case NCJ_UNDEF:
case NCJ_NULL:
return 1;
default: break;
}
}
return 0;
case NCJ_DICT:
case NCJ_UNDEF:
case NCJ_NULL:
break;
default:
return 0;
}
return 1;
int stat = NC_NOERR;
NCjson* jtext = NULL;
char* text = NULL;
if(jdict == NULL) {stat = NC_EINVAL; goto done;}
if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;}
if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;}
if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;}
*jtextp = jtext; jtext = NULL;
done:
NCJreclaim(jtext);
nullfree(text);
return stat;
}
#endif
static int
write_dict(size_t len, const void* data, NCjson** jsonp)
{
int stat = NC_NOERR;
NCjson* jdict = NULL;
assert(jsonp != NULL);
if(NCJparsen(len,(char*)data,0,&jdict))
{stat = NC_EINVAL; goto done;}
if(NCJsort(jdict) != NCJ_DICT)
{stat = NC_EINVAL; goto done;}
*jsonp = jdict; jdict = NULL;
done:
NCJreclaim(jdict);
return stat;
}

View File

@ -31,9 +31,11 @@ main() {
if(fwrite(TRUNCATED_FILE_CONTENT, sizeof(char), sizeof(TRUNCATED_FILE_CONTENT), fp) != sizeof(TRUNCATED_FILE_CONTENT)) ERR;
fclose(fp);
int ncid, stat;
stat = nc_open(FILE_NAME, 0, &ncid);
if (stat != NC_EHDFERR && stat != NC_ENOFILTER && stat != NC_ENOTNC) ERR;
}
{

View File

@ -8,7 +8,6 @@
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
set -x
set -e
echo ""

View File

@ -4,7 +4,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
# This shell script tests the output from several previous tests.
set -x
set -e
echo ""

View File

@ -73,7 +73,7 @@ IF(ENABLE_TESTS)
BUILD_BIN_TEST(zmapio ${COMMONSRC})
TARGET_INCLUDE_DIRECTORIES(zmapio PUBLIC ../libnczarr)
BUILD_BIN_TEST(zhex)
BUILD_BIN_TEST(zisjson)
BUILD_BIN_TEST(zisjson ${COMMONSRC})
TARGET_INCLUDE_DIRECTORIES(zisjson PUBLIC ../libnczarr)
BUILD_BIN_TEST(zs3parse ${COMMONSRC})
TARGET_INCLUDE_DIRECTORIES(zs3parse PUBLIC ../libnczarr)
@ -108,6 +108,7 @@ IF(ENABLE_TESTS)
add_sh_test(nczarr_test run_interop)
add_sh_test(nczarr_test run_misc)
add_sh_test(nczarr_test run_nczarr_fill)
add_sh_test(nczarr_test run_jsonconvention)
BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_quantize)

View File

@ -61,6 +61,7 @@ TESTS += run_purezarr.sh
TESTS += run_interop.sh
TESTS += run_misc.sh
TESTS += run_nczarr_fill.sh
TESTS += run_jsonconvention.sh
endif
@ -127,7 +128,7 @@ run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ut_chunk.sh run_ncgen4.sh \
run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh run_s3_cleanup.sh \
run_purezarr.sh run_interop.sh run_misc.sh \
run_filter.sh run_specific_filters.sh \
run_newformat.sh run_nczarr_fill.sh run_quantize.sh
run_newformat.sh run_nczarr_fill.sh run_quantize.sh run_jsonconvention.sh
EXTRA_DIST += \
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
@ -147,7 +148,7 @@ ref_bzip2.cdl ref_filtered.cdl ref_multi.cdl \
ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \
ref_quotes.zip ref_quotes.cdl \
ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \
ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl
ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_jsonconvention.cdl
# Interoperability files
EXTRA_DIST += ref_power_901_constants.zip ref_power_901_constants.cdl ref_quotes.zip ref_quotes.cdl

View File

@ -0,0 +1,13 @@
netcdf tmp_jsonconvention {
dimensions:
d1 = 1 ;
variables:
int v(d1) ;
v:varconvention = "{\n\"key1\": [1,2,3], \"key2\": {\"key3\": \"abc\"}}" ;
// global attributes:
:grpconvention = "{\"key1\": [1,2,3], \n\"key2\": {\"key3\": \"abc\"}}" ;
data:
v = _ ;
}

View File

@ -0,0 +1,31 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
. "$srcdir/test_nczarr.sh"
# This shell script tests support for:
# read/write using json convention
set -e
testcase() {
zext=$1
echo "*** Test: write then read using json convention"
fileargs tmp_jsonconvention "mode=nczarr,$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_jsonconvention.cdl
${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl
# remove '\n' from ref file before comparing
rm -f tmp_jsonconvention.cdl
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl
diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl
}
testcase file
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi
if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi
exit 0

View File

@ -7,36 +7,122 @@
Output 1 or 0.
*/
#include "config.h"
#include "stdlib.h"
#include "stdio.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#include "stdlib.h"
#include "stdio.h"
#include "string.h"
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#if defined(_WIN32) && !defined(__MINGW32__)
#include "XGetopt.h"
#else
#include <getopt.h>
#endif
#include "netcdf.h"
#include "nclist.h"
#include "ncjson.h"
#define MAXREAD 8192
/* Command line options */
struct Jsonpptions {
int trace;
} jsonoptions;
static const char*
sortname(int thesort)
{
switch(thesort) {
default: break;
case NCJ_INT: return "NCJ_INT";
case NCJ_DOUBLE: return "NCJ_DOUBLE";
case NCJ_BOOLEAN: return "NCJ_BOOLEAN";
case NCJ_STRING: return "NCJ_STRING";
case NCJ_DICT: return "NCJ_DICT";
case NCJ_ARRAY: return "NCJ_ARRAY";
case NCJ_NULL: return "NCJ_NULL";
}
return "?";
}
static void
jsontrace(NCjson* json, int depth)
{
int i;
if(json == NULL) goto done;
printf("[%d] sort=%s",depth,sortname(NCJsort(json)));
switch(NCJsort(json)) {
case NCJ_INT:
case NCJ_DOUBLE:
case NCJ_BOOLEAN:
case NCJ_STRING:
printf(" string=|%s|\n",NCJstring(json));
break;
case NCJ_NULL:
printf("\n");
break;
case NCJ_ARRAY:
printf("\n");
for(i=0;i<NCJlength(json);i++)
jsontrace(NCJith(json,i),depth+1);
break;
case NCJ_DICT:
printf("\n");
for(i=0;i<NCJlength(json);i+=2) {
jsontrace(NCJith(json,i),depth+1);
jsontrace(NCJith(json,i+1),depth+1);
} break;
default: break;
}
done:
return;
}
int
main(int argc, char** argv)
{
int stat = NC_NOERR;
char text[MAXREAD+1];
NCjson* json = NULL;
int i, red;
int i, red, c;
FILE* f = NULL;
if(argc > 1) {
/* use argv[1] as input */
f = fopen(argv[1],"r");
if(f == NULL) {fprintf(stderr,"No such file: %s\n",argv[1]); exit(1);}
} else
f = stdin;
nc_initialize();
memset((void*)&jsonoptions,0,sizeof(jsonoptions));
while ((c = getopt(argc, argv, "t")) != EOF) {
switch(c) {
case 't': jsonoptions.trace = 1; break;
case '?':
fprintf(stderr,"unknown option\n");
exit(1);
}
}
/* get file argument */
argc -= optind;
argv += optind;
if (argc > 1) {
fprintf(stderr, "zisjson: only one input file argument permitted\n");
exit(1);
}
if (argc == 0)
f = stdin;
else {
/* use argv[0] as input */
f = fopen(argv[0],"r");
if(f == NULL) {fprintf(stderr,"No such file: %s\n",argv[1]); exit(1);}
}
/* Read json from stdin */
for(i=0;;i++) {
@ -54,9 +140,12 @@ main(int argc, char** argv)
stat = NC_EEMPTY;
} else {
stat = NCJparse(text,0,&json);
NCJreclaim(json);
if(!stat) {
if(jsonoptions.trace) jsontrace(json,0);
NCJreclaim(json);
}
}
printf("%d",(stat==NC_NOERR?1:0)); /* parse success|failure */
printf("%d",(stat?0:1)); /* parse success|failure */
if(f != stdin) fclose(f);
return 0;
}