Merge pull request #2658 from DennisHeimbigner/znotnc.dmh

Check at nc_open if file appears to be in NCZarr/Zarr format.
This commit is contained in:
Ward Fisher 2023-04-12 21:44:53 -06:00 committed by GitHub
commit b30b4e87cf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 226 additions and 38 deletions

View File

@ -54,7 +54,7 @@ jobs:
- name: Check for plugins
run: |
dir ./plugins
dir ./plugins/.libs
if test -e ./plugins/.libs ; then dir ./plugins/.libs ; fi
- name: (Autotools) Build and Run Tests
run: make check -j 8 LDFLAGS="-Wl,--export-all-symbols"

View File

@ -1069,7 +1069,7 @@ IF(ENABLE_DAP)
ELSE()
MESSAGE(STATUS "Disabling DAP4")
SET(ENABLE_DAP4 OFF CACHE BOOL "")
ENDIF(NOT ENABLE_HDF5)
ENDIF(ENABLE_HDF5)
ELSE()
SET(ENABLE_DAP2 OFF CACHE BOOL "")

View File

@ -7,6 +7,8 @@ This file contains a high-level description of this package's evolution. Release
## 4.9.3 - TBD
* [Bug Fix] Add a crude test to see if an NCZarr path looks like a valid NCZarr/Zarr file. See [Github #2658](https://github.com/Unidata/netcdf-c/pull/2658).
## 4.9.2 - March 14, 2023
This is the maintenance release which adds support for HDF5 version 1.14.0, in addition to a handful of other changes and bugfixes.
@ -16,6 +18,7 @@ This is the maintenance release which adds support for HDF5 version 1.14.0, in a
* Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).
* Update `nc-config` to remove inclusion from automatically-detected `nf-config` and `ncxx-config` files, as the wrong files could be included in the output. This is in support of [GitHub #2274](https://github.com/Unidata/netcdf-c/issues/2274).
* Update H5FDhttp.[ch] to work with HDF5 version 1.13.2 and later. See [Github #2635](https://github.com/Unidata/netcdf-c/pull/2635).
* Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).
* [Bug Fix] Update DAP code to enable CURLOPT_ACCEPT_ENCODING by default. See [Github #2630](https://github.com/Unidata/netcdf-c/pull/2630).
* [Bug Fix] Fix byterange failures for certain URLs. See [Github #2649](https://github.com/Unidata/netcdf-c/pull/2649).
* [Bug Fix] Fix 'make distcheck' error in run_interop.sh. See [Github #2631](https://github.com/Unidata/netcdf-c/pull/2631).

View File

@ -142,6 +142,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls)
if((stat = nczmap_open(zinfo->controls.mapimpl,nc->path,mode,zinfo->controls.flags,NULL,&zinfo->map)))
goto done;
/* Ok, try to read superblock */
if((stat = ncz_read_superblock(file,&nczarr_version,&zarr_format))) goto done;
if(nczarr_version == NULL) /* default */

View File

@ -41,6 +41,8 @@ static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr
static int json_convention_read(NCjson* jdict, NCjson** jtextp);
static int jtypes2atypes(NCjson* jtypes, NClist* atypes);
static int ncz_validate(NC_FILE_INFO_T* file);
/**************************************************/
/**************************************************/
/* Synchronize functions to make map and memory
@ -1829,7 +1831,7 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
break;
default: goto done;
}
/* Also gett Zarr Root Group */
/* Get Zarr Root Group, if any */
switch(stat = NCZ_downloadjson(zinfo->map, ZMETAROOT, &jzgroup)) {
case NC_NOERR:
break;
@ -1842,8 +1844,9 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
if(jzgroup != NULL) {
/* See if this NCZarr V2 */
if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done;
if(!stat && jsuper == NULL)
{if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;}
if(!stat && jsuper == NULL) { /* try uppercase name */
if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;
}
if(jsuper != NULL) {
/* Extract the equivalent attribute */
if(jsuper->sort != NCJ_DICT)
@ -1855,15 +1858,21 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp)
if((stat = NCJdictget(jzgroup,"zarr_format",&jtmp))) goto done;
zarr_format = nulldup(NCJstring(jtmp));
}
/* Set the controls */
/* Set the format flags */
if(jnczgroup == NULL && jsuper == NULL) {
zinfo->controls.flags |= FLAG_PUREZARR;
/* See if this is looks like a NCZarr/Zarr dataset at all
by looking for anything here of the form ".z*" */
if((stat = ncz_validate(file))) goto done;
/* ok, assume pure zarr with no groups */
zinfo->controls.flags |= FLAG_PUREZARR;
zinfo->controls.flags &= ~(FLAG_NCZARR_V1);
if(zarr_format == NULL) zarr_format = strdup("2");
} else if(jnczgroup != NULL) {
zinfo->controls.flags |= FLAG_NCZARR_V1;
/* Also means file is read only */
file->no_write = 1;
} else if(jsuper != NULL) {
/* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */
/* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */
}
if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;}
if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;}
@ -2411,3 +2420,63 @@ jtypes2atypes(NCjson* jtypes, NClist* atypes)
done:
return stat;
}
/* See if there is reason to believe the specified path is a legitimate (NC)Zarr file
* Do a breadth first walk of the tree starting at file path.
* @param file to validate
* @return NC_NOERR if it looks ok
* @return NC_ENOTNC if it does not look ok
*/
static int
ncz_validate(NC_FILE_INFO_T* file)
{
int i,stat = NC_NOERR;
NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info;
int validate = 0;
NCbytes* prefix = ncbytesnew();
NClist* queue = nclistnew();
NClist* nextlevel = nclistnew();
NCZMAP* map = zinfo->map;
char* path = NULL;
char* segment = NULL;
size_t seglen;
ZTRACE(3,"file=%s",file->controller->path);
path = strdup("/");
nclistpush(queue,path);
path = NULL;
do {
/* This should be full path key */
nullfree(path); path = NULL;
path = nclistremove(queue,0); /* remove from front of queue */
/* get list of next level segments (partial keys) */
nclistclear(nextlevel);
if((stat=nczmap_search(map,path,nextlevel))) {validate = 0; goto done;}
/* For each s in next level, test, convert to full path, and push onto queue */
for(i=0;i<nclistlength(nextlevel);i++) {
nullfree(segment); segment = NULL;
segment = nclistremove(nextlevel,0);
seglen = nulllen(segment);
if((seglen >= 2 && memcmp(segment,".z",2)==0) || (seglen >= 4 && memcmp(segment,".ncz",4)==0)) {
validate = 1;
goto done;
}
/* Convert to full path */
ncbytesclear(prefix);
ncbytescat(prefix,path);
if(strlen(path) > 1) ncbytescat(prefix,"/");
ncbytescat(prefix,segment);
/* push onto queue */
nclistpush(queue,ncbytesextract(prefix));
}
} while(nclistlength(queue) > 0);
done:
if(!validate) stat = NC_ENOTNC;
nullfree(path);
nullfree(segment);
nclistfreeall(queue);
nclistfreeall(nextlevel);
ncbytesfree(prefix);
return ZUNTRACE(THROW(stat));
}

View File

@ -120,6 +120,9 @@ IF(ENABLE_TESTS)
BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_quantize)
BUILD_BIN_TEST(tst_notzarr ${TSTCOMMONSRC})
add_sh_test(nczarr_test run_notzarr)
if(ENABLE_S3)
add_sh_test(nczarr_test run_s3_cleanup)
ENDIF()

View File

@ -35,7 +35,7 @@ ut_projections_SOURCES = ut_projections.c ${commonsrc}
ut_chunking_SOURCES = ut_chunking.c ${commonsrc}
tst_fillonlyz_SOURCES = tst_fillonlyz.c ${tstcommonsrc}
check_PROGRAMS += tst_zchunks tst_zchunks2 tst_zchunks3 tst_fillonlyz test_quantize
check_PROGRAMS += tst_zchunks tst_zchunks2 tst_zchunks3 tst_fillonlyz test_quantize tst_notzarr
TESTS += run_ut_chunk.sh
@ -64,7 +64,9 @@ TESTS += run_jsonconvention.sh
TESTS += run_strings.sh
TESTS += run_scalar.sh
TESTS += run_nulls.sh
endif
TESTS += run_notzarr.sh
endif #BUILD_UTILITIES
if BUILD_UTILITIES
@ -139,7 +141,7 @@ run_purezarr.sh run_interop.sh run_misc.sh \
run_filter.sh \
run_newformat.sh run_nczarr_fill.sh run_quantize.sh \
run_jsonconvention.sh run_nczfilter.sh run_unknown.sh \
run_scalar.sh run_strings.sh run_nulls.sh
run_scalar.sh run_strings.sh run_nulls.sh run_notzarr.sh
EXTRA_DIST += \
ref_ut_map_create.cdl ref_ut_map_writedata.cdl ref_ut_map_writemeta2.cdl ref_ut_map_writemeta.cdl \

View File

@ -1,4 +1,4 @@
[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.3-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": ">S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}|
[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": ">S1", "globalillegal": ">S1", "_NCProperties": ">S1"}}}|
[1] /.zgroup : (129) |{"zarr_format": 2, "_nczarr_superblock": {"version": "2.0.0"}, "_nczarr_group": {"dims": {"d1": 1}, "vars": ["v"], "groups": []}}|
[3] /v/.zarray : (202) |{"zarr_format": 2, "shape": [1], "dtype": "<i4", "chunks": [1], "fill_value": -2147483647, "order": "C", "compressor": null, "filters": null, "_nczarr_array": {"dimrefs": ["/d1"], "storage": "chunked"}}|
[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": ">S1", "varjson2": ">S1", "varvec1": ">S1", "varvec2": ">S1"}}}|

View File

@ -17,14 +17,17 @@ echo "*** Test: write then read using json convention"
fileargs tmp_jsonconvention "mode=nczarr,$zext"
deletemap $zext $file
${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_jsonconvention.cdl
# Clean up extraneous changes wrt _NCProperties
${ZMD} -h $fileurl | sed -e 's/,key1=value1|key2=value2//' -e '/"_NCProperties"/ s/(378)/(354)/' > tmp_jsonconvention_${zext}.txt
${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl
${ZMD} -h $fileurl > tmp_jsonconvention_${zext}.txt
# | sed -e 's/,key1=value1|key2=value2//' -e '/"_NCProperties"/ s/(378)/(354)/'
# Clean up extraneous changes so comparisons work
# remove '\n' from ref file before comparing
rm -f tmp_jsonconvention.cdl
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl
diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl
diff -b ${srcdir}/ref_jsonconvention.zmap tmp_jsonconvention_${zext}.txt
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention_clean.cdl
sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.zmap > tmp_jsonconvention_clean.zmap
cat < tmp_jsonconvention_${zext}.cdl > tmp_jsonconvention_clean_${zext}.cdl
sed -e 's|"_NCProperties": "version=2,netcdf=[^,]*,nczarr=2.0.0",||' < tmp_jsonconvention_${zext}.txt > tmp_jsonconvention_clean_${zext}.txt
diff -b tmp_jsonconvention_clean.cdl tmp_jsonconvention_clean_${zext}.cdl
diff -b tmp_jsonconvention_clean.zmap tmp_jsonconvention_clean_${zext}.txt
}
testcase file

82
nczarr_test/run_notzarr.sh Executable file
View File

@ -0,0 +1,82 @@
#!/bin/sh
if test "x$srcdir" = x ; then srcdir=`pwd`; fi
. ../test_common.sh
. "$srcdir/test_nczarr.sh"
# Test ability to detect NCZarr/Zarr files
URL="${NCZARR_S3_TEST_HOST}/${NCZARR_S3_TEST_BUCKET}"
KEY="/netcdf-c"
THISDIR=`pwd`
RESULTSDIR=tmp_notzarr
sometestfailed=
testfailed() {
if test "x$1" != "x-51" ; then
echo "*** Failed"
sometestfailed=1
fi
}
rm -fr ${RESULTSDIR}
mkdir -p ${RESULTSDIR}
cd ${RESULTSDIR}
# Make test sets
mkdir empty.file # empty
mkdir notzarr.file # non-empty, non-zarr
echo "random data" >notzarr.file/notzarr.txt
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then
mkdir empty
zip -r empty.zip empty
cp -r notzarr.file ./notzarr
zip -r notzarr.zip notzarr
rm -fr empty notzarr
fi
if test "x$FEATURE_S3TESTS" = xyes ; then
cat /dev/null > empty.txt
# not possible: ${execdir}/s3util -f notzarr.txt -u "https://${URL}" -k "/netcdf-c/empty.s3" upload
${execdir}/s3util -f notzarr.file/notzarr.txt -u "https://${URL}" -k "/netcdf-c/notzarr.s3/notzarr.txt" upload
fi
echo "Test empty file"
RET=`${execdir}/tst_notzarr "file://empty.file#mode=zarr,file"`
testfailed "$RET"
echo "Test non-zarr file"
RET=`${execdir}/tst_notzarr "file://notzarr.file#mode=zarr,file"`
testfailed "$RET"
if test "x$FEATURE_NCZARR_ZIP" = xyes ; then
echo "Test empty zip file"
RET=`${execdir}/tst_notzarr "file://empty.zip#mode=zarr,zip"`
testfailed "$RET"
echo "Test non-zarr zip file"
RET=`${execdir}/tst_notzarr "file://notzarr.zip#mode=zarr,zip"`
testfailed "$RET"
fi
if test "x$FEATURE_S3TESTS" = xyes ; then
if test 1 = 0 ; then
# This test is NA for S3
echo "Test empty S3 file"
KEY="/netcdf-c/empty.s3"
RET=`${execdir}/tst_notzarr "https://$URL${KEY}#mode=zarr,s3"`
testfailed "$RET"
fi
echo "Test non-zarr S3 file"
RET=`${execdir}/tst_notzarr "https://$URL/netcdf-c/notzarr.s3#mode=zarr,s3"`
testfailed "$RET"
fi
cd ${THISDIR}
# Cleanup
rm -fr ${RESULTSDIR}
if test "x$FEATURE_S3TESTS" = xyes ; then
awsdelete "/netcdf-c"
fi
exit 0

31
nczarr_test/tst_notzarr.c Normal file
View File

@ -0,0 +1,31 @@
/* This is part of the netCDF package.
Copyright 2018 University Corporation for Atmospheric Research/Unidata
See COPYRIGHT file for conditions of use.
Test nczarr filter loading
Author: Dennis Heimbigner
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "netcdf.h"
#define ERR(r) {fprintf(stderr,"fail: line %d: (%d) %s\n",__LINE__,(r),nc_strerror((r)));}
int
main(int argc, char **argv)
{
int ret = NC_NOERR;
int ncid;
if(argc < 2) {
fprintf(stderr,"Usage: tst_notzarr <url>\n");
exit(1);
}
ret = nc_open(argv[1],NC_NETCDF4,&ncid);
printf("%d",ret);
if(ret == NC_NOERR) nc_close(ncid);
exit(0);
}

View File

@ -1,5 +1,5 @@
/*
x * Copyright 2018, University Corporation for Atmospheric Research
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/

View File

@ -24,23 +24,21 @@ endif !ISMINGW
# Create an alternate directory if not installing or for noinst installs.
ALTPLUGINDIR = ${abs_top_builddir}/plugins/plugindir
RPATH = -rpath $(abs_builddir)/.libs
# This is where the plugins are to be installed
if ENABLE_PLUGIN_DIR
plugindir = @PLUGIN_INSTALL_DIR@
plugindir = @PLUGIN_INSTALL_DIR@
else
plugindir = ${ALTPLUGINDIR}
AM_LDFLAGS += -rpath $(abs_builddir)/.libs
plugindir = ${ALTPLUGINDIR}
AM_LDFLAGS += ${RPATH}
endif
plugin_LTLIBRARIES =
plugins_to_install =
# Apparently one cannot have plugin_LTLIBRARIES and also noinst_LTLIBRARIES.
# So create a tmp location for "noinst" shared libraries.
# tmpdir = ${ALTPLUGINDIR}
# These libraries are for testing only
check_LTLIBRARIES =
noinst_LTLIBRARIES =
if ISMINGW
LDADD = ${top_builddir}/liblib/libnetcdf.la
@ -107,22 +105,22 @@ endif #ENABLE_PLUGINS
# Need two distinct instances
lib__nch5noop_la_SOURCES = H5Znoop.c H5Zutil.c h5noop.h
lib__nch5noop1_la_SOURCES = H5Znoop1.c H5Zutil.c h5noop.h
lib__nch5noop_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
lib__nch5noop1_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
lib__nch5noop_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
lib__nch5noop1_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
# The misc filter is to allow testing of filter arguments
lib__nch5misc_la_SOURCES = H5Zmisc.c H5Zutil.c h5misc.h
lib__nch5misc_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
lib__nch5misc_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
lib__nczmisc_la_SOURCES = NCZmisc.c
lib__nczmisc_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
lib__nczmisc_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
# Provide a filter to test missing filter
lib__nch5unknown_la_SOURCES = H5Zunknown.c
lib__nch5unknown_la_LDFLAGS = $(AM_LDFLAGS) -rpath $(abs_builddir)/.libs
lib__nch5unknown_la_LDFLAGS = $(AM_LDFLAGS) ${RPATH}
check_LTLIBRARIES += lib__nch5noop.la lib__nch5noop1.la lib__nch5unknown.la
# findplugin.sh needs these plugins, and I want to see if these get built properly
noinst_LTLIBRARIES += lib__nch5misc.la lib__nczmisc.la
check_LTLIBRARIES += lib__nch5misc.la lib__nczmisc.la
# Bzip2 is used to test more complex filters
lib__nch5bzip2_la_SOURCES = H5Zbzip2.c h5bzip2.h
@ -138,7 +136,7 @@ endif #ENABLE_FILTER_TESTING
if ENABLE_PLUGIN_DIR
plugin_LTLIBRARIES += $(plugins_to_install)
else
noinst_LTLIBRARIES += $(plugins_to_install)
check_LTLIBRARIES += $(plugins_to_install)
endif
BUILT_SOURCES = H5Znoop1.c
@ -157,7 +155,3 @@ bzip2::
tar -zxf ${BZIP2DIR}.tar.gz
cd ${BZIP2DIR}; cp ${BZIP2SRC} ..; cp LICENSE ../BZIP2_LICENSE ; cd ..
rm -fr ./${BZIP2DIR}
# Custom clean
clean-local:
rm -fr ${ALTPLUGINDIR}