Addendum: This PR has been extended to include

interoperability fixed. We were given a Zarr format dataset
stored as a directory+file tree. This dataset uses the XArray
conventions and was generated by some non-Unidata Zarr implementation.
In attempting to process it with NCZarr, several interoperability
problems were discovered and fixed. This gives us more confidence
that NCZarr -- using pure zarr -- can interoperate with other
Zarr implementations.

Specific changes:
* Add test nczarr_test/run_interop.sh
* Support attributes with single value not enclosed in JSON array tags.
* Add mode inferencing and use it in nczarr_test/run_purezarr.sh
* Reduce size of tst_err_enddef.nc because it is more than 3 GB.
This commit is contained in:
Dennis Heimbigner 2021-04-02 18:39:50 -06:00
parent 727dd6861e
commit 0454d8e235
10 changed files with 210 additions and 10 deletions

View File

@ -126,6 +126,16 @@ static const struct MACRODEF {
{NULL,NULL,NULL}
};
/* Mode inferences */
static const struct MODEINFER {
char* key;
char* inference;
} modeinferences[] = {
{"zarr","nczarr"},
{"xarray","zarr"},
{NULL,NULL}
};
/* Map FORMATX to readability to get magic number */
static struct Readable {
int impl;
@ -454,6 +464,74 @@ done:
return check(stat);
}
/* Process mode flag inferences */
static int
processinferences(NClist* fraglenv)
{
int stat = NC_NOERR;
const struct MODEINFER* inferences = NULL;
NClist* modes = NULL;
int inferred,i,pos = -1;
char* modeval = NULL;
char* newmodeval = NULL;
if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done;
/* Get "mode" entry */
for(i=0;i<nclistlength(fraglenv);i+=2) {
char* key = NULL;
key = nclistget(fraglenv,i);
if(strcasecmp(key,"mode")==0) {
pos = i;
break;
}
}
if(pos < 0)
goto done; /* no modes defined */
/* Get the mode as list */
modes = nclistnew();
modeval = (char*)nclistget(fraglenv,pos+1);
/* split on commas */
if((stat=parseonchar(modeval,',',modes))) goto done;
/* Repeatedly walk the mode list until no more new inferences */
do {
inferred = 0;
for(i=0;i<nclistlength(modes);i++) {
const char* mode = nclistget(modes,i);
for(inferences=modeinferences;inferences->key;inferences++) {
if(strcasecmp(inferences->key,mode)==0) {
int j;
int exists = 0;
for(j=0;j<nclistlength(modes);j++) {
const char* candidate = nclistget(modes,j);
if(strcasecmp(candidate,inferences->inference)==0)
{exists = 1; break;}
}
if(!exists) {
/* append the inferred mode if not already present */
nclistpush(modes,strdup(inferences->inference));
inferred = 1;
}
}
}
}
} while(inferred);
/* Store new mode value */
if((newmodeval = list2string(modes))== NULL)
{stat = NC_ENOMEM; goto done;}
nclistset(fraglenv,pos+1,newmodeval);
nullfree(modeval);
modeval = NULL;
done:
nclistfreeall(modes);
return check(stat);
}
static int
mergekey(NClist** valuesp)
{
@ -715,6 +793,12 @@ NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void
printlist(fraglenv,"processmacros");
#endif
/* Phase 2a: Expand mode inferences and add to fraglenv */
if((stat = processinferences(fraglenv))) goto done;
#ifdef DEBUG
printlist(fraglenv,"processinferences");
#endif
/* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
if((stat = cleanfragments(&fraglenv))) goto done;
#ifdef DEBUG

View File

@ -662,7 +662,7 @@ NCJappend(NCjson* object, NCjson* value)
int
NCJarrayith(NCjson* object, size_t i, NCjson** valuep)
{
if(object == NULL || object->sort != NCJ_DICT)
if(object == NULL || object->sort != NCJ_ARRAY)
return NC_EINTERNAL;
if(valuep) *valuep = nclistget(object->contents,i);
return NC_NOERR;

View File

@ -897,7 +897,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
if(typeid == NC_NAT) inferattrtype(values,&typeid);
if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;}
/* Collect the length of the attribute */
/* Collect the length of the attribute; might be a singleton */
switch (values->sort) {
case NCJ_DICT: stat = NC_EINTERNAL; goto done;
case NCJ_ARRAY:
@ -927,7 +927,8 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
if(lenp) *lenp = datalen;
if(datap) {*datap = data; data = NULL;}
if(typeidp) *typeidp = typeid; /* return possibly inferred type */
done:
nullfree(data);
return THROW(stat);
@ -961,9 +962,24 @@ inferattrtype(NCjson* values, nc_type* typeidp)
case NCJ_BOOLEAN:
typeid = NC_UBYTE;
break;
default: return NC_EINVAL;
default: return NC_EINTERNAL;
}
break;
/* Might be a singleton */
case NCJ_INT:
if(values->value[0] == '-') {
sscanf(values->value,"%lld",&i64);
u64 = (unsigned long long)i64;
} else
sscanf(values->value,"%llu",&u64);
typeid = mininttype(u64);
break;
case NCJ_DOUBLE:
typeid = NC_DOUBLE;
break;
case NCJ_BOOLEAN:
typeid = NC_UBYTE;
break;
case NCJ_STRING: /* requires special handling as an array of characters */
typeid = NC_CHAR;
break;

View File

@ -42,8 +42,8 @@ int main(int argc, char** argv)
err = nc_set_fill(ncid, NC_NOFILL, NULL); CHECK_ERR
err = nc_def_dim(ncid, "X", 5, &dimid[0]); CHECK_ERR
err = nc_def_dim(ncid, "YY", 32000, &dimid[1]); CHECK_ERR
err = nc_def_dim(ncid, "XX", 32000, &dimid[2]); CHECK_ERR
err = nc_def_dim(ncid, "YY", 320, &dimid[1]); CHECK_ERR
err = nc_def_dim(ncid, "XX", 320, &dimid[2]); CHECK_ERR
err = nc_def_var(ncid, "var", NC_INT, 1, dimid, &varid); CHECK_ERR
err = nc_def_var(ncid, "var_big", NC_FLOAT, 2, dimid+1, &varid); CHECK_ERR

View File

@ -83,6 +83,7 @@ IF(ENABLE_TESTS)
ENDIF()
add_sh_test(nczarr_test run_ncgen4)
add_sh_test(nczarr_test run_purezarr)
add_sh_test(nczarr_test run_interop)
BUILD_BIN_TEST(tst_chunkcases ${TSTCOMMONSRC})
TARGET_INCLUDE_DIRECTORIES(tst_chunkcases PUBLIC ../libnczarr)

View File

@ -51,6 +51,7 @@ endif
TESTS += run_ncgen4.sh
TESTS += run_purezarr.sh
TESTS += run_interop.sh
check_PROGRAMS += tst_chunkcases
tst_chunkcases_SOURCES = tst_chunkcases.c ${tstcommonsrc}
@ -98,7 +99,7 @@ ncdumpchunks_SOURCES = ncdumpchunks.c
EXTRA_DIST = CMakeLists.txt \
run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ut_chunk.sh run_ncgen4.sh \
run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh run_s3_cleanup.sh \
run_purezarr.sh \
run_purezarr.sh run_interop.sh \
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
ref_ut_map_readmeta.txt ref_ut_map_readmeta2.txt ref_ut_map_search.txt \
ref_ut_mapapi_create.cdl ref_ut_mapapi_data.cdl ref_ut_mapapi_meta.cdl ref_ut_mapapi_search.txt \
@ -113,8 +114,12 @@ ref_misc1.cdl ref_misc1.dmp \
ref_avail1.cdl ref_avail1.dmp ref_avail1.txt \
ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl
# Interoperability files
EXTRA_DIST += power_901_constants.zip ref_power_901_constants.cdl
CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc
# Remove directories
clean-local:
rm -fr tmp*.file results.file results.s3 results.zip
rm -fr power_901_constants.file

Binary file not shown.

View File

@ -0,0 +1,38 @@
netcdf power_901_constants {
dimensions:
lat = 361 ;
lon = 576 ;
variables:
float FROCEAN(lat, lon) ;
FROCEAN:fmissing_value = 999999986991104. ;
FROCEAN:long_name = "fraction_of_ocean" ;
FROCEAN:standard_name = "fraction_of_ocean" ;
FROCEAN:units = "1" ;
FROCEAN:valid_range = -999999986991104., 999999986991104. ;
FROCEAN:vmax = 999999986991104. ;
FROCEAN:vmin = -999999986991104. ;
float FRLAND(lat, lon) ;
FRLAND:fmissing_value = 999999986991104. ;
FRLAND:long_name = "fraction_of_land" ;
FRLAND:standard_name = "fraction_of_land" ;
FRLAND:units = "1" ;
FRLAND:valid_range = -999999986991104., 999999986991104. ;
FRLAND:vmax = 999999986991104. ;
FRLAND:vmin = -999999986991104. ;
float FRLAKE(lat, lon) ;
FRLAKE:fmissing_value = 999999986991104. ;
FRLAKE:long_name = "fraction_of_lake" ;
FRLAKE:standard_name = "fraction_of_lake" ;
FRLAKE:units = "1" ;
FRLAKE:valid_range = -999999986991104., 999999986991104. ;
FRLAKE:vmax = 999999986991104. ;
FRLAKE:vmin = -999999986991104. ;
float FRLANDICE(lat, lon) ;
FRLANDICE:fmissing_value = 999999986991104. ;
FRLANDICE:long_name = "fraction_of_land_ice" ;
FRLANDICE:standard_name = "fraction_of_land_ice" ;
FRLANDICE:units = "1" ;
FRLANDICE:valid_range = -999999986991104., 999999986991104. ;
FRLANDICE:vmax = 999999986991104. ;
FRLANDICE:vmin = -999999986991104. ;
}

57
nczarr_test/run_interop.sh Executable file
View File

@ -0,0 +1,57 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
. "$srcdir/test_nczarr.sh"
# This shell script tests compatibility between
# this implementation and other implementations
# by means of files constructed by that other implementation
set -e
testcasefile() {
zext=file
ref=$1
mode=$2
if test "x$3" = xmetaonly ; then flags="-h"; fi
fileargs ${srcdir}/$ref "mode=$mode,$zext"
rm -f tmp_${ref}_${zext}.cdl
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
}
testcasezip() {
zext=zip
ref=$1
mode=$2
if test "x$3" = xmetaonly ; then flags="-h"; fi
fileargs ${srcdir}/$ref "mode=$mode,$zext"
rm -f tmp_${ref}_${zext}.cdl
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
}
testallcases() {
zext=$1
case "$zext" in
file)
# need to unpack
rm -fr power_901_constants power_901_constants.file
unzip ${srcdir}/power_901_constants.zip > /dev/null
mv power_901_constants power_901_constants.file
testcasefile power_901_constants xarray metaonly
;;
zip)
testcasezip power_901_constants xarray metaonly
;;
*) echo "unimplemented kind: $1" ; exit 1;;
esac
}
#testallcases file
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testallcases zip; fi
#No examples yet: if test "x$FEATURE_S3TESTS" = xyes ; then testallcases s3; fi
exit 0

View File

@ -15,19 +15,18 @@ testcase() {
zext=$1
echo "*** Test: pure zarr write; format=$zext"
fileargs tmp_purezarr "zarr&mode=$zext"
fileargs tmp_purezarr "mode=zarr,$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
${NCDUMP} $fileurl > tmp_purezarr_${zext}.cdl
diff -b ${srcdir}/ref_purezarr.cdl tmp_purezarr_${zext}.cdl
echo "*** Test: xarray zarr write; format=$zext"
fileargs tmp_xarray "xarray&mode=$zext"
fileargs tmp_xarray "mode=xarray,$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
${NCDUMP} $fileurl > tmp_xarray_${zext}.cdl
diff -b ${srcdir}/ref_xarray.cdl tmp_xarray_${zext}.cdl
}
testcase file