mirror of
https://github.com/Unidata/netcdf-c.git
synced 2025-01-30 16:10:44 +08:00
Addendum: This PR has been extended to include
interoperability fixed. We were given a Zarr format dataset stored as a directory+file tree. This dataset uses the XArray conventions and was generated by some non-Unidata Zarr implementation. In attempting to process it with NCZarr, several interoperability problems were discovered and fixed. This gives us more confidence that NCZarr -- using pure zarr -- can interoperate with other Zarr implementations. Specific changes: * Add test nczarr_test/run_interop.sh * Support attributes with single value not enclosed in JSON array tags. * Add mode inferencing and use it in nczarr_test/run_purezarr.sh * Reduce size of tst_err_enddef.nc because it is more than 3 GB.
This commit is contained in:
parent
727dd6861e
commit
0454d8e235
@ -126,6 +126,16 @@ static const struct MACRODEF {
|
||||
{NULL,NULL,NULL}
|
||||
};
|
||||
|
||||
/* Mode inferences */
|
||||
static const struct MODEINFER {
|
||||
char* key;
|
||||
char* inference;
|
||||
} modeinferences[] = {
|
||||
{"zarr","nczarr"},
|
||||
{"xarray","zarr"},
|
||||
{NULL,NULL}
|
||||
};
|
||||
|
||||
/* Map FORMATX to readability to get magic number */
|
||||
static struct Readable {
|
||||
int impl;
|
||||
@ -454,6 +464,74 @@ done:
|
||||
return check(stat);
|
||||
}
|
||||
|
||||
/* Process mode flag inferences */
|
||||
static int
|
||||
processinferences(NClist* fraglenv)
|
||||
{
|
||||
int stat = NC_NOERR;
|
||||
const struct MODEINFER* inferences = NULL;
|
||||
NClist* modes = NULL;
|
||||
int inferred,i,pos = -1;
|
||||
char* modeval = NULL;
|
||||
char* newmodeval = NULL;
|
||||
|
||||
if(fraglenv == NULL || nclistlength(fraglenv) == 0) goto done;
|
||||
|
||||
/* Get "mode" entry */
|
||||
for(i=0;i<nclistlength(fraglenv);i+=2) {
|
||||
char* key = NULL;
|
||||
key = nclistget(fraglenv,i);
|
||||
if(strcasecmp(key,"mode")==0) {
|
||||
pos = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if(pos < 0)
|
||||
goto done; /* no modes defined */
|
||||
|
||||
/* Get the mode as list */
|
||||
modes = nclistnew();
|
||||
modeval = (char*)nclistget(fraglenv,pos+1);
|
||||
/* split on commas */
|
||||
if((stat=parseonchar(modeval,',',modes))) goto done;
|
||||
|
||||
/* Repeatedly walk the mode list until no more new inferences */
|
||||
do {
|
||||
inferred = 0;
|
||||
for(i=0;i<nclistlength(modes);i++) {
|
||||
const char* mode = nclistget(modes,i);
|
||||
for(inferences=modeinferences;inferences->key;inferences++) {
|
||||
if(strcasecmp(inferences->key,mode)==0) {
|
||||
int j;
|
||||
int exists = 0;
|
||||
for(j=0;j<nclistlength(modes);j++) {
|
||||
const char* candidate = nclistget(modes,j);
|
||||
if(strcasecmp(candidate,inferences->inference)==0)
|
||||
{exists = 1; break;}
|
||||
}
|
||||
if(!exists) {
|
||||
/* append the inferred mode if not already present */
|
||||
nclistpush(modes,strdup(inferences->inference));
|
||||
inferred = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} while(inferred);
|
||||
|
||||
/* Store new mode value */
|
||||
if((newmodeval = list2string(modes))== NULL)
|
||||
{stat = NC_ENOMEM; goto done;}
|
||||
nclistset(fraglenv,pos+1,newmodeval);
|
||||
nullfree(modeval);
|
||||
modeval = NULL;
|
||||
|
||||
done:
|
||||
nclistfreeall(modes);
|
||||
return check(stat);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
mergekey(NClist** valuesp)
|
||||
{
|
||||
@ -715,6 +793,12 @@ NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void
|
||||
printlist(fraglenv,"processmacros");
|
||||
#endif
|
||||
|
||||
/* Phase 2a: Expand mode inferences and add to fraglenv */
|
||||
if((stat = processinferences(fraglenv))) goto done;
|
||||
#ifdef DEBUG
|
||||
printlist(fraglenv,"processinferences");
|
||||
#endif
|
||||
|
||||
/* Phase 3: coalesce duplicate fragment keys and remove duplicate values */
|
||||
if((stat = cleanfragments(&fraglenv))) goto done;
|
||||
#ifdef DEBUG
|
||||
|
@ -662,7 +662,7 @@ NCJappend(NCjson* object, NCjson* value)
|
||||
int
|
||||
NCJarrayith(NCjson* object, size_t i, NCjson** valuep)
|
||||
{
|
||||
if(object == NULL || object->sort != NCJ_DICT)
|
||||
if(object == NULL || object->sort != NCJ_ARRAY)
|
||||
return NC_EINTERNAL;
|
||||
if(valuep) *valuep = nclistget(object->contents,i);
|
||||
return NC_NOERR;
|
||||
|
@ -897,7 +897,7 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
|
||||
if(typeid == NC_NAT) inferattrtype(values,&typeid);
|
||||
if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;}
|
||||
|
||||
/* Collect the length of the attribute */
|
||||
/* Collect the length of the attribute; might be a singleton */
|
||||
switch (values->sort) {
|
||||
case NCJ_DICT: stat = NC_EINTERNAL; goto done;
|
||||
case NCJ_ARRAY:
|
||||
@ -927,7 +927,8 @@ computeattrdata(nc_type* typeidp, NCjson* values, size_t* lenp, void** datap)
|
||||
|
||||
if(lenp) *lenp = datalen;
|
||||
if(datap) {*datap = data; data = NULL;}
|
||||
|
||||
if(typeidp) *typeidp = typeid; /* return possibly inferred type */
|
||||
|
||||
done:
|
||||
nullfree(data);
|
||||
return THROW(stat);
|
||||
@ -961,9 +962,24 @@ inferattrtype(NCjson* values, nc_type* typeidp)
|
||||
case NCJ_BOOLEAN:
|
||||
typeid = NC_UBYTE;
|
||||
break;
|
||||
default: return NC_EINVAL;
|
||||
default: return NC_EINTERNAL;
|
||||
}
|
||||
break;
|
||||
/* Might be a singleton */
|
||||
case NCJ_INT:
|
||||
if(values->value[0] == '-') {
|
||||
sscanf(values->value,"%lld",&i64);
|
||||
u64 = (unsigned long long)i64;
|
||||
} else
|
||||
sscanf(values->value,"%llu",&u64);
|
||||
typeid = mininttype(u64);
|
||||
break;
|
||||
case NCJ_DOUBLE:
|
||||
typeid = NC_DOUBLE;
|
||||
break;
|
||||
case NCJ_BOOLEAN:
|
||||
typeid = NC_UBYTE;
|
||||
break;
|
||||
case NCJ_STRING: /* requires special handling as an array of characters */
|
||||
typeid = NC_CHAR;
|
||||
break;
|
||||
|
@ -42,8 +42,8 @@ int main(int argc, char** argv)
|
||||
err = nc_set_fill(ncid, NC_NOFILL, NULL); CHECK_ERR
|
||||
|
||||
err = nc_def_dim(ncid, "X", 5, &dimid[0]); CHECK_ERR
|
||||
err = nc_def_dim(ncid, "YY", 32000, &dimid[1]); CHECK_ERR
|
||||
err = nc_def_dim(ncid, "XX", 32000, &dimid[2]); CHECK_ERR
|
||||
err = nc_def_dim(ncid, "YY", 320, &dimid[1]); CHECK_ERR
|
||||
err = nc_def_dim(ncid, "XX", 320, &dimid[2]); CHECK_ERR
|
||||
|
||||
err = nc_def_var(ncid, "var", NC_INT, 1, dimid, &varid); CHECK_ERR
|
||||
err = nc_def_var(ncid, "var_big", NC_FLOAT, 2, dimid+1, &varid); CHECK_ERR
|
||||
|
@ -83,6 +83,7 @@ IF(ENABLE_TESTS)
|
||||
ENDIF()
|
||||
add_sh_test(nczarr_test run_ncgen4)
|
||||
add_sh_test(nczarr_test run_purezarr)
|
||||
add_sh_test(nczarr_test run_interop)
|
||||
|
||||
BUILD_BIN_TEST(tst_chunkcases ${TSTCOMMONSRC})
|
||||
TARGET_INCLUDE_DIRECTORIES(tst_chunkcases PUBLIC ../libnczarr)
|
||||
|
@ -51,6 +51,7 @@ endif
|
||||
|
||||
TESTS += run_ncgen4.sh
|
||||
TESTS += run_purezarr.sh
|
||||
TESTS += run_interop.sh
|
||||
|
||||
check_PROGRAMS += tst_chunkcases
|
||||
tst_chunkcases_SOURCES = tst_chunkcases.c ${tstcommonsrc}
|
||||
@ -98,7 +99,7 @@ ncdumpchunks_SOURCES = ncdumpchunks.c
|
||||
EXTRA_DIST = CMakeLists.txt \
|
||||
run_ut_map.sh run_ut_mapapi.sh run_ut_misc.sh run_ut_chunk.sh run_ncgen4.sh \
|
||||
run_nccopyz.sh run_fillonlyz.sh run_chunkcases.sh test_nczarr.sh run_perf_chunks1.sh run_s3_cleanup.sh \
|
||||
run_purezarr.sh \
|
||||
run_purezarr.sh run_interop.sh \
|
||||
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \
|
||||
ref_ut_map_readmeta.txt ref_ut_map_readmeta2.txt ref_ut_map_search.txt \
|
||||
ref_ut_mapapi_create.cdl ref_ut_mapapi_data.cdl ref_ut_mapapi_meta.cdl ref_ut_mapapi_search.txt \
|
||||
@ -113,8 +114,12 @@ ref_misc1.cdl ref_misc1.dmp \
|
||||
ref_avail1.cdl ref_avail1.dmp ref_avail1.txt \
|
||||
ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl
|
||||
|
||||
# Interoperability files
|
||||
EXTRA_DIST += power_901_constants.zip ref_power_901_constants.cdl
|
||||
|
||||
CLEANFILES = ut_*.txt ut*.cdl tmp*.nc tmp*.cdl tmp*.txt tmp*.dmp tmp*.zip tmp*.nc
|
||||
|
||||
# Remove directories
|
||||
clean-local:
|
||||
rm -fr tmp*.file results.file results.s3 results.zip
|
||||
rm -fr power_901_constants.file
|
||||
|
BIN
nczarr_test/power_901_constants.zip
Normal file
BIN
nczarr_test/power_901_constants.zip
Normal file
Binary file not shown.
38
nczarr_test/ref_power_901_constants.cdl
Normal file
38
nczarr_test/ref_power_901_constants.cdl
Normal file
@ -0,0 +1,38 @@
|
||||
netcdf power_901_constants {
|
||||
dimensions:
|
||||
lat = 361 ;
|
||||
lon = 576 ;
|
||||
variables:
|
||||
float FROCEAN(lat, lon) ;
|
||||
FROCEAN:fmissing_value = 999999986991104. ;
|
||||
FROCEAN:long_name = "fraction_of_ocean" ;
|
||||
FROCEAN:standard_name = "fraction_of_ocean" ;
|
||||
FROCEAN:units = "1" ;
|
||||
FROCEAN:valid_range = -999999986991104., 999999986991104. ;
|
||||
FROCEAN:vmax = 999999986991104. ;
|
||||
FROCEAN:vmin = -999999986991104. ;
|
||||
float FRLAND(lat, lon) ;
|
||||
FRLAND:fmissing_value = 999999986991104. ;
|
||||
FRLAND:long_name = "fraction_of_land" ;
|
||||
FRLAND:standard_name = "fraction_of_land" ;
|
||||
FRLAND:units = "1" ;
|
||||
FRLAND:valid_range = -999999986991104., 999999986991104. ;
|
||||
FRLAND:vmax = 999999986991104. ;
|
||||
FRLAND:vmin = -999999986991104. ;
|
||||
float FRLAKE(lat, lon) ;
|
||||
FRLAKE:fmissing_value = 999999986991104. ;
|
||||
FRLAKE:long_name = "fraction_of_lake" ;
|
||||
FRLAKE:standard_name = "fraction_of_lake" ;
|
||||
FRLAKE:units = "1" ;
|
||||
FRLAKE:valid_range = -999999986991104., 999999986991104. ;
|
||||
FRLAKE:vmax = 999999986991104. ;
|
||||
FRLAKE:vmin = -999999986991104. ;
|
||||
float FRLANDICE(lat, lon) ;
|
||||
FRLANDICE:fmissing_value = 999999986991104. ;
|
||||
FRLANDICE:long_name = "fraction_of_land_ice" ;
|
||||
FRLANDICE:standard_name = "fraction_of_land_ice" ;
|
||||
FRLANDICE:units = "1" ;
|
||||
FRLANDICE:valid_range = -999999986991104., 999999986991104. ;
|
||||
FRLANDICE:vmax = 999999986991104. ;
|
||||
FRLANDICE:vmin = -999999986991104. ;
|
||||
}
|
57
nczarr_test/run_interop.sh
Executable file
57
nczarr_test/run_interop.sh
Executable file
@ -0,0 +1,57 @@
|
||||
#!/bin/sh
|
||||
|
||||
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
|
||||
. ../test_common.sh
|
||||
|
||||
. "$srcdir/test_nczarr.sh"
|
||||
|
||||
# This shell script tests compatibility between
|
||||
# this implementation and other implementations
|
||||
# by means of files constructed by that other implementation
|
||||
|
||||
set -e
|
||||
|
||||
testcasefile() {
|
||||
zext=file
|
||||
ref=$1
|
||||
mode=$2
|
||||
if test "x$3" = xmetaonly ; then flags="-h"; fi
|
||||
fileargs ${srcdir}/$ref "mode=$mode,$zext"
|
||||
rm -f tmp_${ref}_${zext}.cdl
|
||||
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
|
||||
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
|
||||
}
|
||||
|
||||
testcasezip() {
|
||||
zext=zip
|
||||
ref=$1
|
||||
mode=$2
|
||||
if test "x$3" = xmetaonly ; then flags="-h"; fi
|
||||
fileargs ${srcdir}/$ref "mode=$mode,$zext"
|
||||
rm -f tmp_${ref}_${zext}.cdl
|
||||
${NCDUMP} $flags $fileurl > tmp_${ref}_${zext}.cdl
|
||||
diff -b ${srcdir}/ref_${ref}.cdl tmp_${ref}_${zext}.cdl
|
||||
}
|
||||
|
||||
testallcases() {
|
||||
zext=$1
|
||||
case "$zext" in
|
||||
file)
|
||||
# need to unpack
|
||||
rm -fr power_901_constants power_901_constants.file
|
||||
unzip ${srcdir}/power_901_constants.zip > /dev/null
|
||||
mv power_901_constants power_901_constants.file
|
||||
testcasefile power_901_constants xarray metaonly
|
||||
;;
|
||||
zip)
|
||||
testcasezip power_901_constants xarray metaonly
|
||||
;;
|
||||
*) echo "unimplemented kind: $1" ; exit 1;;
|
||||
esac
|
||||
}
|
||||
|
||||
#testallcases file
|
||||
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testallcases zip; fi
|
||||
#No examples yet: if test "x$FEATURE_S3TESTS" = xyes ; then testallcases s3; fi
|
||||
|
||||
exit 0
|
@ -15,19 +15,18 @@ testcase() {
|
||||
zext=$1
|
||||
|
||||
echo "*** Test: pure zarr write; format=$zext"
|
||||
fileargs tmp_purezarr "zarr&mode=$zext"
|
||||
fileargs tmp_purezarr "mode=zarr,$zext"
|
||||
deletemap $zext $file
|
||||
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
|
||||
${NCDUMP} $fileurl > tmp_purezarr_${zext}.cdl
|
||||
diff -b ${srcdir}/ref_purezarr.cdl tmp_purezarr_${zext}.cdl
|
||||
|
||||
echo "*** Test: xarray zarr write; format=$zext"
|
||||
fileargs tmp_xarray "xarray&mode=$zext"
|
||||
fileargs tmp_xarray "mode=xarray,$zext"
|
||||
deletemap $zext $file
|
||||
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_purezarr_base.cdl
|
||||
${NCDUMP} $fileurl > tmp_xarray_${zext}.cdl
|
||||
diff -b ${srcdir}/ref_xarray.cdl tmp_xarray_${zext}.cdl
|
||||
|
||||
}
|
||||
|
||||
testcase file
|
||||
|
Loading…
Reference in New Issue
Block a user