diff --git a/.github/workflows/run_tests_osx.yml b/.github/workflows/run_tests_osx.yml index 791988779..cf87b35bb 100644 --- a/.github/workflows/run_tests_osx.yml +++ b/.github/workflows/run_tests_osx.yml @@ -7,7 +7,7 @@ name: Run macOS-based netCDF Tests -on: [pull_request, workflow_dispatch] +on: [push,pull_request, workflow_dispatch] jobs: diff --git a/.github/workflows/run_tests_ubuntu.yml b/.github/workflows/run_tests_ubuntu.yml index 5f18aa1e0..a6d25dec0 100644 --- a/.github/workflows/run_tests_ubuntu.yml +++ b/.github/workflows/run_tests_ubuntu.yml @@ -4,7 +4,7 @@ name: Run Ubuntu/Linux netCDF Tests -on: [pull_request, workflow_dispatch] +on: [push,pull_request, workflow_dispatch] jobs: @@ -42,7 +42,7 @@ jobs: wget https://support.hdfgroup.org/ftp/HDF/releases/HDF4.2.15/src/hdf-4.2.15.tar.bz2 tar -jxf hdf-4.2.15.tar.bz2 pushd hdf-4.2.15 - ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib + ./configure --prefix=${HOME}/environments/${{ matrix.hdf5 }} --disable-static --enable-shared --disable-fortran --disable-netcdf --with-szlib make -j make install -j popd @@ -164,7 +164,7 @@ jobs: - name: Configure shell: bash -l {0} - run: CFLAGS=${CFLAGS} LDFLAGS=${LDFLAGS} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ./configure --enable-hdf4 --enable-hdf5 --enable-dap --disable-dap-remote-tests --enable-doxygen + run: CFLAGS=${CFLAGS} LDFLAGS=${LDFLAGS} LD_LIBRARY_PATH=${LD_LIBRARY_PATH} ./configure --enable-hdf4 --enable-hdf5 --enable-dap --disable-dap-remote-tests --enable-doxygen --enable-external-server-tests if: ${{ success() }} - name: Look at config.log if error diff --git a/.github/workflows/run_tests_win_mingw.yml b/.github/workflows/run_tests_win_mingw.yml index 46771aae0..773819c02 100644 --- a/.github/workflows/run_tests_win_mingw.yml +++ b/.github/workflows/run_tests_win_mingw.yml @@ -7,7 +7,7 @@ name: Run MSYS2, MinGW64-based Tests -on: [pull_request, workflow_dispatch] +on: [push,pull_request, workflow_dispatch] jobs: diff --git a/CMakeLists.txt b/CMakeLists.txt index 2153f22d9..2db8dea02 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1078,17 +1078,26 @@ OPTION(ENABLE_BYTERANGE "Enable byte-range access to remote datasets.." OFF) # Check for the math library so it can be explicitly linked. IF(NOT WIN32) FIND_LIBRARY(HAVE_LIBM NAMES math m libm) - MESSAGE(STATUS "Found Math library: ${HAVE_LIBM}") IF(NOT HAVE_LIBM) - MESSAGE(FATAL_ERROR "Unable to find the math library.") + CHECK_FUNCTION_EXISTS(exp HAVE_LIBM_FUNC) + IF(NOT HAVE_LIBM_FUNC) + MESSAGE(FATAL_ERROR "Unable to find the math library.") + ELSE(NOT HAVE_LIBM_FUNC) + SET(HAVE_LIBM "") + ENDIF() + ELSE(NOT HAVE_LIBM) + MESSAGE(STATUS "Found Math library: ${HAVE_LIBM}") ENDIF() ENDIF() # Option to Enable DAP long tests, remote tests. OPTION(ENABLE_DAP_REMOTE_TESTS "Enable DAP remote tests." ON) +OPTION(ENABLE_EXTERNAL_SERVER_TESTS "Enable external Server remote tests." OFF) OPTION(ENABLE_DAP_LONG_TESTS "Enable DAP long tests." OFF) SET(REMOTETESTSERVERS "remotetest.unidata.ucar.edu" CACHE STRING "test servers to use for remote test") +SET(REMOTETESTSERVERS "remotetest.unidata.ucar.edu" CACHE STRING "test servers to use for remote test") + # See if we have zlib FIND_PACKAGE(ZLIB) @@ -1725,7 +1734,7 @@ ENDIF() # Set some of the options as advanced. MARK_AS_ADVANCED(ENABLE_INTERNAL_DOCS VALGRIND_TESTS ENABLE_COVERAGE_TESTS ) -MARK_AS_ADVANCED(ENABLE_DAP_REMOTE_TESTS ENABLE_DAP_LONG_TESTS USE_REMOTE_CDASH) +MARK_AS_ADVANCED(ENABLE_DAP_REMOTE_TESTS ENABLE_DAP_LONG_TESTS USE_REMOTE_CDASH ENABLE_EXTERNAL_SERVER_TESTS) MARK_AS_ADVANCED(ENABLE_DOXYGEN_BUILD_RELEASE_DOCS DOXYGEN_ENABLE_TASKS ENABLE_DOXYGEN_SERVER_SIDE_SEARCH) MARK_AS_ADVANCED(ENABLE_SHARED_LIBRARY_VERSION) diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 85a130730..5cdbf49b0 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -9,6 +9,9 @@ This file contains a high-level description of this package's evolution. Release * [Bug Fix] Fix blosc plugin errors. See [Github #2461](https://github.com/Unidata/netcdf-c/pull/2461). * [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2466](https://github.com/Unidata/netcdf-c/pull/2466). +* [Enhancement] Add support for Zarr (fixed length) string type in nczarr. See [Github #2492](https://github.com/Unidata/netcdf-c/pull/2492). +* [Bug Fix] Split the remote tests into two parts: one for the remotetest server and one for all other external servers. Also add a configure option to enable the latter set. See [Github #2491](https://github.com/Unidata/netcdf-c/pull/2491). +* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2466). * [Bug Fix] Provide a default enum const when fill value does not match any enum constant for the value zero. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2462). * [Bug Fix] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448). * [Bug Fix] Fix quantize with CLASSIC_MODEL files. See [Github #2405](https://github.com/Unidata/netcdf-c/pull/2445). @@ -28,7 +31,7 @@ This file contains a high-level description of this package's evolution. Release * [Enhancement] Allow the read/write of JSON-valued Zarr attributes to allow for domain specific info such as used by GDAL/Zarr. See [Github #2278](https://github.com/Unidata/netcdf-c/pull/2278). * [Enhancement] Turn on the XArray convention for NCZarr files by default. WARNING, this means that the mode should explicitly specify "nczarr" or "zarr" even if "xarray" or "noxarray" is specified. See [Github #2257](https://github.com/Unidata/netcdf-c/pull/2257). - +* [Enhancement] Update the documentation to match the current filter capabilities See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249). * [Enhancement] Update the documentation to match the current filter capabilities. See [Github #2249](https://github.com/Unidata/netcdf-c/pull/2249). * [Enhancement] Support installation of pre-built standard filters into user-specified location. See [Github #2318](https://github.com/Unidata/netcdf-c/pull/2318). * [Enhancement] Improve filter support. More specifically (1) add nc_inq_filter_avail to check if a filter is available, (2) add the notion of standard filters, (3) cleanup szip support to fix interaction with NCZarr. See [Github #2245](https://github.com/Unidata/netcdf-c/pull/2245). diff --git a/config.h.cmake.in b/config.h.cmake.in index 97004f62c..7273971f3 100644 --- a/config.h.cmake.in +++ b/config.h.cmake.in @@ -441,6 +441,9 @@ with zip */ /* if true, HDF5 is at least version 1.10.5 and supports UTF8 paths */ #cmakedefine HDF5_UTF8_PATHS 1 +/* if true, backtrace support will be used. */ +#cmakedefine HAVE_EXECINFO_H 1 + /* if true, include JNA bug fix */ #cmakedefine JNA 1 diff --git a/configure.ac b/configure.ac index 5094aefa1..31322216a 100644 --- a/configure.ac +++ b/configure.ac @@ -603,32 +603,53 @@ AM_CONDITIONAL(ENABLE_QUANTIZE, [test x$enable_quantize = xyes]) # --enable-dap => enable-dap4 enable_dap4=$enable_dap -AC_MSG_CHECKING([whether dap remote testing should be enabled]) +AC_MSG_CHECKING([whether dap use of remotetest server should be enabled]) AC_ARG_ENABLE([dap-remote-tests], - [AS_HELP_STRING([--enable-dap-remote-tests], - [enable dap remote tests])]) -test "x$enable_dap_remote_tests" = xno || enable_dap_remote_tests=yes + [AS_HELP_STRING([--disable-dap-remote-tests], + [disable dap remote tests])]) +# Default off +test "x$enable_dap_remote_tests" = xyes || enable_dap_remote_tests=no if test "x$enable_dap" = "xno" ; then enable_dap_remote_tests=no fi AC_MSG_RESULT($enable_dap_remote_tests) +AC_MSG_CHECKING([whether dap use of remotetest server should be enabled]) +AC_ARG_ENABLE([dap-remote-tests], + [AS_HELP_STRING([--disable-dap-remote-tests], + [disable dap remote tests])]) +test "x$enable_dap_remote_tests" = xno || enable_dap_remote_tests=yes +AC_MSG_RESULT($enable_dap_remote_tests) + +AC_MSG_CHECKING([whether use of external servers should be enabled]) +AC_ARG_ENABLE([external-server-tests], + [AS_HELP_STRING([--enable-external-server-tests (default off)], + [enable external server tests])]) +test "x$enable_external_server_tests" = xyes || enable_external_server_tests=no +AC_MSG_RESULT($enable_external_server_tests) + +if test "x$enable_dap_remote_tests" = "xno" ; then + AC_MSG_NOTICE([--disable-dap_remote_tests => --disable-external-server-tests]) + enable_external_server_tests=no +fi + # Default is not to do the remote authorization tests. -AC_MSG_CHECKING([whether dap remote authorization testing should be enabled (default off)]) +AC_MSG_CHECKING([whether dap authorization testing should be enabled (default off)]) AC_ARG_ENABLE([dap-auth-tests], [AS_HELP_STRING([--enable-dap-auth-tests], [enable dap remote authorization tests])]) test "x$enable_dap_auth_tests" = xyes || enable_dap_auth_tests=no -# dap must be enabled -if test "x$enable_dap" = "xno" ; then - enable_dap_auth_tests=no -fi -# if remote tests are disabled, then so is this -if test "x$enable_dap_remote_tests" = "xno" ; then - enable_dap_remote_tests=no -fi AC_MSG_RESULT($enable_dap_auth_tests) +# dap must be enabled + +if test "x$enable_dap" = "xno" ; then + AC_MSG_NOTICE([--disable-dap => --disable-dap-remote-tests --disable-auth-tests --disable-external-server-tests]) + enable_dap_remote_tests=no + enable_dap_auth_tests=no + enable_external_server_tests=no +fi + # Did the user specify a list of test servers to try for remote tests? AC_MSG_CHECKING([which remote test server(s) to use]) AC_ARG_WITH([testservers], @@ -652,16 +673,20 @@ fi if test "x$enable_dap_remote_tests" = xyes; then AC_DEFINE([ENABLE_DAP_REMOTE_TESTS], [1], [if true, do remote tests]) fi +if test "x$enable_external_server_tests" = xyes; then + AC_DEFINE([ENABLE_EXTERNAL_SERVER_TESTS], [1], [if true, do remote external tests]) +fi AC_MSG_CHECKING([whether the time-consuming dap tests should be enabled (default off)]) AC_ARG_ENABLE([dap-long-tests], [AS_HELP_STRING([--enable-dap-long-tests], [enable dap long tests])]) test "x$enable_dap_long_tests" = xyes || enable_dap_long_tests=no -if test "x$enable_dap_remote_tests" = "xno" ; then +AC_MSG_RESULT([$enable_dap_long_tests]) +if test "x$enable_dap_remote_tests" = "xno" || test "x$enable_external_server_tests" = "xno" ; then + AC_MSG_NOTICE([--disable-dap-remote|external-server-tests => --disable_dap_long_tests]) enable_dap_long_tests=no fi -AC_MSG_RESULT([$enable_dap_long_tests]) # Control zarr storage if test "x$enable_nczarr" = xyes ; then @@ -1757,6 +1782,7 @@ AM_CONDITIONAL(ENABLE_DAP4, [test "x$enable_dap4" = xyes]) AM_CONDITIONAL(USE_STRICT_NULL_BYTE_HEADER_PADDING, [test x$enable_strict_null_byte_header_padding = xyes]) AM_CONDITIONAL(ENABLE_CDF5, [test "x$enable_cdf5" = xyes]) AM_CONDITIONAL(ENABLE_DAP_REMOTE_TESTS, [test "x$enable_dap_remote_tests" = xyes]) +AM_CONDITIONAL(ENABLE_EXTERNAL_SERVER_TESTS, [test "x$enable_external_server_tests" = xyes]) AM_CONDITIONAL(ENABLE_DAP_AUTH_TESTS, [test "x$enable_dap_auth_tests" = xyes]) AM_CONDITIONAL(ENABLE_DAP_LONG_TESTS, [test "x$enable_dap_long_tests" = xyes]) AM_CONDITIONAL(USE_PNETCDF_DIR, [test ! "x$PNETCDFDIR" = x]) diff --git a/dap4_test/test_thredds.sh b/dap4_test/test_thredds.sh index e6c949ac5..5f5fd4726 100755 --- a/dap4_test/test_thredds.sh +++ b/dap4_test/test_thredds.sh @@ -36,6 +36,7 @@ failure() { setresultdir results_test_thredds if test "x${RESET}" = x1 ; then rm -fr ${BASELINEH}/*.thredds ; fi +if test "x$FEATURE_THREDDSTEST" = x1 ; then for f in $F ; do makeurl "dap4://thredds-test.unidata.ucar.edu/thredds/dap4/casestudies" "$f" echo "testing: $URL" @@ -51,6 +52,7 @@ for f in $F ; do cp ./results_test_thredds/${base}.thredds ${BASELINETH}/${base}.thredds fi done +fi # FEATURE_THREDDSTEST #rm -fr "${GOESLONG}" diff --git a/docs/nczarr.md b/docs/nczarr.md index 95e6c5589..eb7cd46b2 100644 --- a/docs/nczarr.md +++ b/docs/nczarr.md @@ -13,12 +13,10 @@ This extension provides a mapping from a subset of the full netCDF Enhanced (aka The NetCDF version of this storage format is called NCZarr <a href="#ref_nczarr">[4]</a>. A note on terminology in this document. - 1. The term "dataset" is used to refer to all of the Zarr objects constituting the meta-data and data. There are some important "caveats" of which to be aware when using this software. - 1. NCZarr currently is not thread-safe. So any attempt to use it with parallelism, including MPIO, is likely to fail. # The NCZarr Data Model {#nczarr_data_model} @@ -35,28 +33,29 @@ Specifically the XArray ''\_ARRAY\_DIMENSIONS'' attribute is one such. There are two other, secondary assumption: 1. The actual storage format in which the dataset is stored -- a zip file, for example -- can be read by the _Zarr_ implementation. -2. The filters used by the dataset can be encoded/decoded by the implementation. +2. The compressors (aka filters) used by the dataset can be encoded/decoded by the implementation. NCZarr uses HDF5-style filters, so ensuring access to such filters is somewhat complicated. See [the companion document on +filters](./md_filters.html "filters") for details. -Briefly, the data model supported by NCZarr is netcdf-4 minus the user-defined types and the String type. -As with netcdf-4 chunking is supported. -Filters and compression are supported, but -[the companion document on filters](./md_filters.html "filters") -should be consulted for the details. +Briefly, the data model supported by NCZarr is netcdf-4 minus +the user-defined types. However, a restricted form of String type +is supported (see Appendix H). +As with netcdf-4 chunking is supported. Filters and compression +are also [supported](./md_filters.html "filters"). Specifically, the model supports the following. -- "Atomic" types: char, byte, ubyte, short, ushort, int, uint, int64, uint64. +- "Atomic" types: char, byte, ubyte, short, ushort, int, uint, int64, uint64, string. - Shared (named) dimensions - Attributes with specified types -- both global and per-variable - Chunking - Fill values - Groups - N-Dimensional variables +- Scalar variables - Per-variable endianness (big or little) - Filters (including compression) With respect to full netCDF-4, the following concepts are currently unsupported. -- String type - User-defined types (enum, opaque, VLEN, and Compound) - Unlimited dimensions - Contiguous or compact storage @@ -66,6 +65,15 @@ because they are HDF5 specific. When specified, they are treated as chunked where the file consists of only one chunk. This means that testing for contiguous or compact is not possible; the _nc_inq_var_chunking_ function will always return NC_CHUNKED and the chunksizes will be the same as the dimension sizes of the variable's dimensions. +Additionally, it should be noted that NCZarr supports scalar variables, +but Zarr does not; Zarr only supports dimensioned variables. +In order to support interoperability, NCZarr does the following. +1. A scalar variable is recorded in the Zarr metadata as if it has a shape of **[1]**. +2. A note is stored in the NCZarr metadata that this is actually a netCDF scalar variable. + +These actions allow NCZarr to properly show scalars in its API while still +maintaining compatibility with Zarr. + # Enabling NCZarr Support {#nczarr_enable} NCZarr support is enabled by default. @@ -322,7 +330,6 @@ aws_secret_access_key=YYYY... ``` See Appendix E for additional information. - ## Addressing Style The notion of "addressing style" may need some expansion. @@ -378,14 +385,14 @@ of NCZarr specific information. These keys are as follows: -_\_NCZARR_SUPERBLOCK\__ -- this is in the top level group -- key _/.zarr_. +_\_nczarr_superblock\__ -- this is in the top level group -- key _/.zarr_. It is in effect the "superblock" for the dataset and contains any netcdf specific dataset level information. It is also used to verify that a given key is the root of a dataset. Currently it contains the following key(s): * "version" -- the NCZarr version defining the format of the dataset. -_\_NCZARR_GROUP\__ -- this key appears in every _.zgroup_ object. +_\_nczarr_group\__ -- this key appears in every _.zgroup_ object. It contains any netcdf specific group information. Specifically it contains the following keys: * "dims" -- the name and size of shared dimensions defined in this group. @@ -393,13 +400,13 @@ Specifically it contains the following keys: * "groups" -- the name of sub-groups defined in this group. These lists allow walking the NCZarr dataset without having to use the potentially costly search operation. -_\_NCZARR_ARRAY\__ -- this key appears in every _.zarray_ object. +_\_nczarr_array\__ -- this key appears in every _.zarray_ object. It contains netcdf specific array information. Specifically it contains the following keys: * dimrefs -- the names of the shared dimensions referenced by the variable. * storage -- indicates if the variable is chunked vs contiguous in the netcdf sense. -_\_NCZARR_ATTR\__ -- this key appears in every _.zattr_ object. +_\_nczarr_attr\__ -- this key appears in every _.zattr_ object. This means that technically, it is attribute, but one for which access is normally surpressed . Specifically it contains the following keys: @@ -412,17 +419,17 @@ The latter case, zarr reading nczarr is possible if the zarr library is willing The former case, nczarr reading zarr is also possible if the nczarr can simulate or infer the contents of the missing _\_NCZARR\_XXX_ objects. As a rule this can be done as follows. -1. _\_NCZARR_GROUP\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. +1. _\_nczarr_group\__ -- The list of contained variables and sub-groups can be computed using the search API to list the keys "contained" in the key for a group. The search looks for occurrences of _.zgroup_, _.zattr_, _.zarray_ to infer the keys for the contained groups, attribute sets, and arrays (variables). Constructing the set of "shared dimensions" is carried out by walking all the variables in the whole dataset and collecting the set of unique integer shapes for the variables. For each such dimension length, a top level dimension is created named ".zdim_<len>" where len is the integer length. -2. _\_NCZARR_ARRAY\__ -- The dimrefs are inferred by using the shape +2. _\_nczarr_array\__ -- The dimrefs are inferred by using the shape in _.zarray_ and creating references to the simulated shared dimension. netcdf specific information. -3. _\_NCZARR_ATTR\__ -- The type of each attribute is inferred by trying to parse the first attribute value string. +3. _\_nczarr_attr\__ -- The type of each attribute is inferred by trying to parse the first attribute value string. # Compatibility {#nczarr_compatibility} @@ -434,7 +441,7 @@ The Xarray <a href="#ref_xarray">[7]</a> Zarr implementation uses its own mechan It uses a special attribute named ''_ARRAY_DIMENSIONS''. The value of this attribute is a list of dimension names (strings). An example might be ````["time", "lon", "lat"]````. -It is essentially equivalent to the ````_NCZARR_ARRAY "dimrefs" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. +It is essentially equivalent to the ````_nczarr_array "dimrefs" list````, except that the latter uses fully qualified names so the referenced dimensions can be anywhere in the dataset. As of _netcdf-c_ version 4.8.2, The Xarray ''_ARRAY_DIMENSIONS'' attribute is supported for both NCZarr and pure Zarr. If possible, this attribute will be read/written by default, @@ -778,34 +785,169 @@ The version 1 format defines three specific objects: _.nczgroup_, _.nczarray_,_. These are stored in parallel with the corresponding Zarr objects. So if there is a key of the form "/x/y/.zarray", then there is also a key "/x/y/.nczarray". The content of these objects is the same as the contents of the corresponding keys. So the value of the ''_NCZARR_ARRAY'' key is the same as the content of the ''.nczarray'' object. The list of connections is as follows: -* ''.nczarr'' <=> ''_NCZARR_SUPERBLOCK_'' -* ''.nczgroup <=> ''_NCZARR_GROUP_'' -* ''.nczarray <=> ''_NCZARR_ARRAY_'' -* ''.nczattr <=> ''_NCZARR_ATTR_'' +* ''.nczarr'' <=> ''_nczarr_superblock_'' +* ''.nczgroup <=> ''_nczarr_group_'' +* ''.nczarray <=> ''_nczarr_array_'' +* ''.nczattr <=> ''_nczarr_attr_'' # Appendix G. JSON Attribute Convention. {#nczarr_json} -An attribute may be encountered on read whose value when parsed -by JSON is a dictionary. As a special conventions, the value -converted to a string and stored as the value of the attribute -and the type of the attribute is treated as char. +The Zarr V2 specification is somewhat vague on what is a legal +value for an attribute. The examples all show one of two cases: +1. A simple JSON scalar atomic values (e.g. int, float, char, etc), or +2. A JSON array of such values. -When writing a character valued attribute, it's value is examined -to see if it looks like a JSON dictionary (i.e. "{...}") -and is parseable as JSON. -If so, then the attribute value is treated as one long string, -parsed as JSON, and stored in the .zattr file in JSON form. - -These conventions are intended to help support various +However, the Zarr specification can be read to infer that the value +can in fact be any legal JSON expression. +This "convention" is currently used routinely to help support various attributes created by other packages where the attribute is a -complex JSON dictionary. An example is the GDAL Driver -convention <a href="#ref_gdal">[12]</a>. The value is a complex -JSON dictionary and it is desirable to both read and write that kind of -information through the netcdf API. +complex JSON expression. An example is the GDAL Driver +convention <a href="#ref_gdal">[12]</a>, where the value is a complex +JSON dictionary. + +In order for NCZarr to be as consistent as possible with Zarr Version 2, +it is desirable to support this convention for attribute values. +This means that there must be some way to handle an attribute +whose value is not either of the two cases above. That is, its value +is some more complex JSON expression. Ideally both reading and writing +of such attributes should be supported. + +One more point. NCZarr attempts to record the associated netcdf +attribute type (encoded in the form of a NumPy "dtype") for each +attribute. This information is stored as NCZarr-specific +metadata. Note that pure Zarr makes no attempt to record such +type information. + +The current algorithm to support JSON valued attributes +operates as follows. + +## Writing an attribute: +There are mutiple cases to consider. + +1. The netcdf attribute **is not** of type NC_CHAR and its value is a single atomic value. + * Convert to an equivalent JSON atomic value and write that JSON expression. + * Compute the Zarr equivalent dtype and store in the NCZarr metadata. + +2. The netcdf attribute **is not** of type NC_CHAR and its value is a vector of atomic values. + * Convert to an equivalent JSON array of atomic values and write that JSON expression. + * Compute the Zarr equivalent dtype and store in the NCZarr metadata. + +3. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters – +**is** parseable as a legal JSON expression. + * Parse to produce a JSON expression and write that expression. + * Use "|S1" as the dtype and store in the NCZarr metadata. + +4. The netcdf attribute **is** of type NC_CHAR and its value – taken as a single sequence of characters – +**is not** parseable as a legal JSON expression. + * Convert to a JSON string and write that expression + * Use "|S1" as the dtype and store in the NCZarr metadata. + +## Reading an attribute: + +The process of reading and interpreting an attribute value requires two +pieces of information. +* The value of the attribute as a JSON expression, and +* The optional associated dtype of the attribute; note that this may not exist +if, for example, the file is pure zarr. + +Given these two pieces of information, the read process is as follows. + +1. The JSON expression is a simple JSON atomic value. + * If the dtype is defined, then convert the JSON to that type of data, +and then store it as the equivalent netcdf vector of size one. + * If the dtype is not defined, then infer the dtype based on the the JSON value, +and then store it as the equivalent netcdf vector of size one. + +2. The JSON expression is an array of simple JSON atomic values. + * If the dtype is defined, then convert each JSON value in the array to that type of data, +and then store it as the equivalent netcdf vector. + * If the dtype is not defined, then infer the dtype based on the first JSON value in the array, +and then store it as the equivalent netcdf vector. + +3. The JSON expression is an array some of whose values are dictionaries or (sub-)arrays. + * Un-parse the expression to an equivalent sequence of characters, and then store it as of type NC_CHAR. + +3. The JSON expression is a dictionary. + * Un-parse the expression to an equivalent sequence of characters, and then store it as of type NC_CHAR. + +## Notes + +1. If a character valued attributes's value can be parsed as a legal JSON expression, then it will be stored as such. +2. Reading and writing are *almost* idempotent in that the sequence of +actions "read-write-read" is equivalent to a single "read" and "write-read-write" is equivalent to a single "write". +The "almost" caveat is necessary because (1) whitespace may be added or lost during the sequence of operations, +and (2) numeric precision may change. + +# Appendix H. Support for string types + +Zarr supports a string type, but it is restricted to +fixed size strings. NCZarr also supports such strings, +but there are some differences in order to interoperate +with the netcdf-4/HDF5 variable length strings. + +The primary issue to be addressed is to provide a way for user +to specify the maximum size of the fixed length strings. This is +handled by providing the following new attributes: +1. **_nczarr_default_maxstrlen** — +This is an attribute of the root group. It specifies the default +maximum string length for string types. If not specified, then +it has the value of 64 characters. +2. **_nczarr_maxstrlen** — +This is a per-variable attribute. It specifies the maximum +string length for the string type associated with the variable. +If not specified, then it is assigned the value of +**_nczarr_default_maxstrlen**. + +Note that when accessing a string through the netCDF API, the +fixed length strings appear as variable length strings. This +means that they are stored as pointers to the string +(i.e. **char\***) and with a trailing nul character. +One consequence is that if the user writes a variable length +string through the netCDF API, and the length of that string +is greater than the maximum string length for a variable, +then the string is silently truncated. +Another consequence is that the user must reclaim the string storage. + +Adding strings also requires some hacking to handle the existing +netcdf-c NC_CHAR type, which does not exist in Zarr. The goal +was to choose NumPY types for both the netcdf-c NC_STRING type +and the netcdf-c NC_CHAR type such that if a pure zarr +implementation reads them, it will still work. + +For writing variables and NCZarr attributes, the type mapping is as follows: +* "|S1" for NC_CHAR. +* ">S1" for NC_STRING && MAXSTRLEN==1 +* ">Sn" for NC_STRING && MAXSTRLEN==n + +Note that it is a bit of a hack to use endianness, but it should be ok since for +string/char, the endianness has no meaning. + +So when reading data with a pure zarr implementaion +the above types should always appear as strings, +and the type that signals NC_CHAR (in NCZarr) +would be handled by Zarr as a string of length 1. + +# Change Log {#nczarr_changelog} + +Note, this log was only started as of 8/11/2022 and is not +intended to be a detailed chronology. Rather, it provides highlights +that will be of interest to NCZarr users. In order to see exact changes, +It is necessary to use the 'git diff' command. + +## 8/29/2022 +1. Zarr fixed-size string types are now supported. + +## 8/11/2022 +1. The NCZarr specific keys have been converted to lower-case +(e.g. "_nczarr_attr" instead of "_NCZARR_ATTR"). Upper case is +accepted for back compatibility. + +2. The legal values of an attribute has been extended to +include arbitrary JSON expressions; see Appendix G for more details. # Point of Contact {#nczarr_poc} __Author__: Dennis Heimbigner<br> __Email__: dmh at ucar dot edu<br> __Initial Version__: 4/10/2020<br> -__Last Revised__: 7/16/2021 +__Last Revised__: 8/27/2022 diff --git a/include/nc4internal.h b/include/nc4internal.h index 9b76c135a..2833e0c5a 100644 --- a/include/nc4internal.h +++ b/include/nc4internal.h @@ -106,9 +106,6 @@ typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT; /** Subset of readonly flags; readable by name only thru the API. */ #define NAMEONLYFLAG 4 -/** Subset of readonly flags; Value is actually in file. */ -#define MATERIALIZEDFLAG 8 - /** Per-variable attribute, as opposed to global */ #define VARFLAG 16 @@ -492,9 +489,13 @@ extern void NC_freeglobalstate(void); #define NC_ATT_COORDINATES "_Netcdf4Coordinates" /*see hdf5internal.h:COORDINATES*/ #define NC_ATT_FORMAT "_Format" #define NC_ATT_DIMID_NAME "_Netcdf4Dimid" +#define NC_ATT_FILLVALUE "_FillValue" #define NC_ATT_NC3_STRICT_NAME "_nc3_strict" #define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS" #define NC_ATT_CODECS "_Codecs" -#define NC_NCZARR_ATTR "_NCZARR_ATTR" +#define NC_NCZARR_ATTR "_nczarr_attr" +#define NC_NCZARR_ATTR_UC "_NCZARR_ATTR" +#define NC_NCZARR_MAXSTRLEN_ATTR "_nczarr_maxstrlen" +#define NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR "_nczarr_default_maxstrlen" #endif /* _NC4INTERNAL_ */ diff --git a/include/ncconfigure.h b/include/ncconfigure.h index c0679bc30..77d34ac4c 100644 --- a/include/ncconfigure.h +++ b/include/ncconfigure.h @@ -50,9 +50,10 @@ extern "C" { #endif /* WARNING: in some systems, these functions may be defined as macros, so check */ -#ifndef HAVE_STRDUP #ifndef strdup +#ifndef HAVE_STRDUP char* strdup(const char*); +#define HAVE_STRDUP #endif #endif @@ -120,11 +121,7 @@ unsigned long long int strtoull(const char*, char**, int); /* handle null arguments */ #ifndef nulldup -#ifdef HAVE_STRDUP #define nulldup(s) ((s)==NULL?NULL:strdup(s)) -#else -extern char *nulldup(const char* s); -#endif #endif #ifndef nulllen diff --git a/include/ncjson.h b/include/ncjson.h index c4974cfb5..32b050f06 100644 --- a/include/ncjson.h +++ b/include/ncjson.h @@ -64,6 +64,7 @@ typedef struct NCjson { don't use union so we can know when to reclaim sval */ struct NCJconst {int bval; long long ival; double dval; char* sval;}; +#define NCJconst_empty {0,0,0.0,NULL} /**************************************************/ /* Extended API */ @@ -116,6 +117,8 @@ OPTEXPORT int NCJclone(const NCjson* json, NCjson** clonep); #ifndef NETCDF_JSON_H /* dump NCjson* object to output file */ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); +/* convert NCjson* object to output string */ +OPTEXPORT const char* NCJtotext(const NCjson* json); #endif #if defined(__cplusplus) diff --git a/include/netcdf_json.h b/include/netcdf_json.h index d53944a8a..33eabe460 100644 --- a/include/netcdf_json.h +++ b/include/netcdf_json.h @@ -2,9 +2,19 @@ See the COPYRIGHT file for more information. */ + #ifndef NETCDF_JSON_H #define NETCDF_JSON_H 1 +/* +WARNING: +If you modify this file, +then you need to got to +the include/ directory +and do the command: + make makepluginjson +*/ + /* Inside libnetcdf and for plugins, export the json symbols */ #ifndef DLLEXPORT #ifdef _WIN32 @@ -54,6 +64,7 @@ typedef struct NCjson { don't use union so we can know when to reclaim sval */ struct NCJconst {int bval; long long ival; double dval; char* sval;}; +#define NCJconst_empty {0,0,0.0,NULL} /**************************************************/ /* Extended API */ @@ -106,6 +117,8 @@ OPTEXPORT int NCJclone(const NCjson* json, NCjson** clonep); #ifndef NETCDF_JSON_H /* dump NCjson* object to output file */ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); +/* convert NCjson* object to output string */ +OPTEXPORT const char* NCJtotext(const NCjson* json); #endif #if defined(__cplusplus) @@ -140,6 +153,18 @@ OPTEXPORT void NCJdump(const NCjson* json, unsigned flags, FILE*); TODO: make utf8 safe */ +/* +WARNING: +If you modify this file, +then you need to got to +the include/ directory +and do the command: + make makenetcdfjson +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -235,7 +260,7 @@ static int bytesappendquoted(NCJbuf* buf, const char* s); static int bytesappend(NCJbuf* buf, const char* s); static int bytesappendc(NCJbuf* bufp, const char c); -/* Hide these for plugins */ +/* Hide everything for plugins */ #ifdef NETCDF_JSON_H #define OPTSTATIC static static int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); @@ -266,10 +291,6 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) NCJparser* parser = NULL; NCjson* json = NULL; - /* Need at least 1 character of input */ - if(len == 0 || text == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if(jsonp == NULL) goto done; parser = calloc(1,sizeof(NCJparser)); if(parser == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} @@ -277,6 +298,16 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) if(parser->text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} memcpy(parser->text,text,len); + /* trim trailing whitespace */ + if(len > 0) { + char* p; + for(p=parser->text+(len-1);p >= parser->text;p--) { + if(*p > ' ') break; + } + len = (size_t)((p - parser->text) + 1); + } + if(len == 0) + {stat = NCJTHROW(NCJ_ERR); goto done;} parser->text[len] = '\0'; parser->text[len+1] = '\0'; parser->pos = &parser->text[0]; @@ -285,6 +316,8 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) fprintf(stderr,"json: |%s|\n",parser->text); #endif if((stat=NCJparseR(parser,&json))==NCJ_ERR) goto done; + /* Must consume all of the input */ + if(parser->pos != (parser->text+len)) {stat = NCJ_ERR; goto done;} *jsonp = json; json = NULL; @@ -1186,16 +1219,32 @@ NCJdump(const NCjson* json, unsigned flags, FILE* out) nullfree(text); } +OPTSTATIC const char* +NCJtotext(const NCjson* json) +{ + static char outtext[4096]; + char* text = NULL; + if(json == NULL) {strcpy(outtext,"<null>"); goto done;} + (void)NCJunparse(json,0,&text); + outtext[0] = '\0'; + strlcat(outtext,text,sizeof(outtext)); + nullfree(text); +done: + return outtext; +} + /* Hack to avoid static unused warning */ -void +static void netcdf_supresswarnings(void) { void* ignore; - ignore = (void*)NCJdump; + ignore = (void*)netcdf_supresswarnings; ignore = (void*)NCJinsert; ignore = (void*)NCJaddstring; ignore = (void*)NCJcvt; ignore = (void*)NCJdictget; ignore = (void*)NCJparse; + ignore = (void*)NCJdump; + ignore = (void*)NCJtotext; ignore = ignore; } diff --git a/libdap4/ncd4dispatch.c b/libdap4/ncd4dispatch.c index f875c641e..616f9c905 100644 --- a/libdap4/ncd4dispatch.c +++ b/libdap4/ncd4dispatch.c @@ -43,7 +43,7 @@ static const NC_reservedatt NCD4_reserved[] = { {D4CHECKSUMATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Checksum_CRC32*/ {D4LEATTR, READONLYFLAG|NAMEONLYFLAG}, /*_DAP4_Little_Endian*/ /* Also need to include the provenance attributes */ - {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ + {NCPROPS, READONLYFLAG|NAMEONLYFLAG}, /*_NCProperties*/ {NULL, 0} }; diff --git a/libdispatch/dinfermodel.c b/libdispatch/dinfermodel.c index d991b9e0f..74fd55a4f 100644 --- a/libdispatch/dinfermodel.c +++ b/libdispatch/dinfermodel.c @@ -907,7 +907,7 @@ NC_infermodel(const char* path, int* omodep, int iscreate, int useparallel, void if(!modelcomplete(model)) { const char** p = ncurifragmentparams(uri); /* envv format */ if(p != NULL) { - for(;*p;p++) { + for(;*p;p+=2) { const char* key = p[0]; const char* value = p[1];; if((stat=processfragmentkeys(key,value,model))) goto done; diff --git a/libdispatch/ncjson.c b/libdispatch/ncjson.c index b716fbdec..0ebd515aa 100644 --- a/libdispatch/ncjson.c +++ b/libdispatch/ncjson.c @@ -15,6 +15,9 @@ and do the command: make makenetcdfjson */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif #include <stdlib.h> #include <stdio.h> #include <string.h> @@ -111,7 +114,7 @@ static int bytesappendquoted(NCJbuf* buf, const char* s); static int bytesappend(NCJbuf* buf, const char* s); static int bytesappendc(NCJbuf* bufp, const char c); -/* Hide these for plugins */ +/* Hide everything for plugins */ #ifdef NETCDF_JSON_H #define OPTSTATIC static static int NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp); @@ -142,10 +145,6 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) NCJparser* parser = NULL; NCjson* json = NULL; - /* Need at least 1 character of input */ - if(len == 0 || text == NULL) - {stat = NCJTHROW(NCJ_ERR); goto done;} - if(jsonp == NULL) goto done; parser = calloc(1,sizeof(NCJparser)); if(parser == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} @@ -153,6 +152,16 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) if(parser->text == NULL) {stat = NCJTHROW(NCJ_ERR); goto done;} memcpy(parser->text,text,len); + /* trim trailing whitespace */ + if(len > 0) { + char* p; + for(p=parser->text+(len-1);p >= parser->text;p--) { + if(*p > ' ') break; + } + len = (size_t)((p - parser->text) + 1); + } + if(len == 0) + {stat = NCJTHROW(NCJ_ERR); goto done;} parser->text[len] = '\0'; parser->text[len+1] = '\0'; parser->pos = &parser->text[0]; @@ -161,6 +170,8 @@ NCJparsen(size_t len, const char* text, unsigned flags, NCjson** jsonp) fprintf(stderr,"json: |%s|\n",parser->text); #endif if((stat=NCJparseR(parser,&json))==NCJ_ERR) goto done; + /* Must consume all of the input */ + if(parser->pos != (parser->text+len)) {stat = NCJ_ERR; goto done;} *jsonp = json; json = NULL; @@ -1062,17 +1073,32 @@ NCJdump(const NCjson* json, unsigned flags, FILE* out) nullfree(text); } +OPTSTATIC const char* +NCJtotext(const NCjson* json) +{ + static char outtext[4096]; + char* text = NULL; + if(json == NULL) {strcpy(outtext,"<null>"); goto done;} + (void)NCJunparse(json,0,&text); + outtext[0] = '\0'; + strlcat(outtext,text,sizeof(outtext)); + nullfree(text); +done: + return outtext; +} + /* Hack to avoid static unused warning */ static void netcdf_supresswarnings(void) { void* ignore; ignore = (void*)netcdf_supresswarnings; - ignore = (void*)NCJdump; ignore = (void*)NCJinsert; ignore = (void*)NCJaddstring; ignore = (void*)NCJcvt; ignore = (void*)NCJdictget; ignore = (void*)NCJparse; + ignore = (void*)NCJdump; + ignore = (void*)NCJtotext; ignore = ignore; } diff --git a/libdispatch/nclog.c b/libdispatch/nclog.c index 823c83228..b474b8c4b 100644 --- a/libdispatch/nclog.c +++ b/libdispatch/nclog.c @@ -32,7 +32,7 @@ #define MAXTAGS 256 #define NCTAGDFALT "Log"; -#define NC_MAX_FRAMES 256 +#define NC_MAX_FRAMES 1024 static int nclogginginitialized = 0; diff --git a/libhdf5/hdf5debug.c b/libhdf5/hdf5debug.c index 4eed2c5bf..ca5c6cd68 100644 --- a/libhdf5/hdf5debug.c +++ b/libhdf5/hdf5debug.c @@ -5,7 +5,7 @@ #include "config.h" #include <stdarg.h> #include <stdio.h> -#if !defined _WIN32 && !defined __CYGWIN__ +#ifdef HAVE_EXECINFO_H #include <execinfo.h> #endif @@ -15,15 +15,18 @@ #define STSIZE 1000 +#ifdef HAVE_EXECINFO_H #ifdef H5BACKTRACE # if !defined _WIN32 && !defined __CYGWIN__ static void* stacktrace[STSIZE]; # endif #endif +#endif int nch5breakpoint(int err) { +#ifdef HAVE_EXECINFO_H #ifdef H5BACKTRACE # if !defined _WIN32 && !defined __CYGWIN__ int count = 0; @@ -39,6 +42,7 @@ nch5breakpoint(int err) if(trace != NULL) free(trace); # endif # endif +#endif #endif return err; } diff --git a/liblib/CMakeLists.txt b/liblib/CMakeLists.txt index aa3a842d9..e3eddc0fb 100644 --- a/liblib/CMakeLists.txt +++ b/liblib/CMakeLists.txt @@ -135,7 +135,7 @@ IF(ENABLE_PNETCDF AND PNETCDF) ENDIF() IF(ENABLE_S3_SDK) -# TARGET_LINK_DIRECTORIES(netcdf PUBLIC ${AWSSDK_LIB_DIR}) + TARGET_LINK_DIRECTORIES(netcdf PUBLIC ${AWSSDK_LIB_DIR}) TARGET_LINK_LIBRARIES(netcdf ${AWS_LINK_LIBRARIES}) ENDIF() diff --git a/libnczarr/zarr.c b/libnczarr/zarr.c index 671b0c2ac..05961c7e0 100644 --- a/libnczarr/zarr.c +++ b/libnczarr/zarr.c @@ -62,6 +62,8 @@ ncz_create_dataset(NC_FILE_INFO_T* file, NC_GRP_INFO_T* root, const char** contr &zinfo->zarr.nczarr_version.minor, &zinfo->zarr.nczarr_version.release); + zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + /* Apply client controls */ if((stat = applycontrols(zinfo))) goto done; @@ -126,6 +128,7 @@ ncz_open_dataset(NC_FILE_INFO_T* file, const char** controls) zinfo->native_endianness = (NCZ_isLittleEndian() ? NC_ENDIAN_LITTLE : NC_ENDIAN_BIG); if((zinfo->envv_controls = NCZ_clonestringvec(0,controls))==NULL) /*0=>envv style*/ {stat = NC_ENOMEM; goto done;} + zinfo->default_maxstrlen = NCZ_MAXSTR_DEFAULT; /* Add struct to hold NCZ-specific group info. */ if (!(root->format_grp_info = calloc(1, sizeof(NCZ_GRP_INFO_T)))) @@ -288,75 +291,6 @@ done: } #endif -#if 0 -/** -@internal Rewrite attributes into a group or var -@param map - [in] the map object for storage -@param container - [in] the containing object -@param jattrs - [in] the json for .zattrs -@param jtypes - [in] the json for .ztypes -@return NC_NOERR -@author Dennis Heimbigner -*/ -int -ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjson* jtypes) -{ - int stat = NC_NOERR; - char* fullpath = NULL; - char* akey = NULL; - char* tkey = NULL; - NCZMAP* map = zinfo->map; - - assert((NCJsort(jattrs) == NCJ_DICT)); - assert((NCJsort(jtypes) == NCJ_DICT)); - - if(container->sort == NCGRP) { - NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)container; - /* Get grp's fullpath name */ - if((stat = NCZ_grpkey(grp,&fullpath))) - goto done; - } else { - NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container; - /* Get var's fullpath name */ - if((stat = NCZ_varkey(var,&fullpath))) - goto done; - } - - /* Construct the path to the .zattrs object */ - if((stat = nczm_concat(fullpath,ZATTRS,&akey))) - goto done; - - /* Always write as V2 */ - - { - NCjson* k = NULL; - NCjson* v = NULL; - /* remove any previous version */ - if(!NCJremove(jattrs,NCZ_V2_ATTRS,&k,&v)) { - NCJreclaim(k); NCJreclaim(v); - } - } - - if(!(zinfo->controls.flags & FLAG_PUREZARR)) { - /* Insert the jtypes into the set of attributes */ - if((stat = NCJinsert(jattrs,NCZ_V2_ATTRS,jtypes))) goto done; - } - - /* Upload the .zattrs object */ - if((stat=NCZ_uploadjson(map,tkey,jattrs))) - goto done; - -done: - if(stat) { - NCJreclaim(jattrs); - NCJreclaim(jtypes); - } - nullfree(fullpath); - nullfree(akey); - nullfree(tkey); - return stat; -} -#endif static const char* controllookup(const char** envv_controls, const char* key) @@ -415,3 +349,75 @@ done: return stat; } +#if 0 +/** +@internal Rewrite attributes into a group or var +@param map - [in] the map object for storage +@param container - [in] the containing object +@param jattrs - [in] the json for .zattrs +@param jtypes - [in] the json for .ztypes +@return NC_NOERR +@author Dennis Heimbigner +*/ +int +ncz_unload_jatts(NCZ_FILE_INFO_T* zinfo, NC_OBJ* container, NCjson* jattrs, NCjson* jtypes) +{ + int stat = NC_NOERR; + char* fullpath = NULL; + char* akey = NULL; + char* tkey = NULL; + NCZMAP* map = zinfo->map; + + assert((NCJsort(jattrs) == NCJ_DICT)); + assert((NCJsort(jtypes) == NCJ_DICT)); + + if(container->sort == NCGRP) { + NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)container; + /* Get grp's fullpath name */ + if((stat = NCZ_grpkey(grp,&fullpath))) + goto done; + } else { + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)container; + /* Get var's fullpath name */ + if((stat = NCZ_varkey(var,&fullpath))) + goto done; + } + + /* Construct the path to the .zattrs object */ + if((stat = nczm_concat(fullpath,ZATTRS,&akey))) + goto done; + + /* Always write as V2 */ + + { + NCjson* k = NULL; + NCjson* v = NULL; + /* remove any previous version */ + if(!NCJremove(jattrs,NCZ_V2_ATTRS,1,&k,&v)) { + NCJreclaim(k); NCJreclaim(v); + } + } + + if(!(zinfo->controls.flags & FLAG_PUREZARR)) { + /* Insert the jtypes into the set of attributes */ + if((stat = NCJinsert(jattrs,NCZ_V2_ATTRS,jtypes))) goto done; + } + + /* Upload the .zattrs object */ + if((stat=NCZ_uploadjson(map,tkey,jattrs))) + goto done; + +done: + if(stat) { + NCJreclaim(jattrs); + NCJreclaim(jtypes); + } + nullfree(fullpath); + nullfree(akey); + nullfree(tkey); + return stat; +} +#endif + + + diff --git a/libnczarr/zarr.h b/libnczarr/zarr.h index 9f78e1929..6957bdd14 100644 --- a/libnczarr/zarr.h +++ b/libnczarr/zarr.h @@ -15,6 +15,16 @@ struct ChunkKey; struct S3credentials; +/* Intermediate results */ +struct ZCVT { + signed long long int64v; + unsigned long long uint64v; + double float64v; + char* strv; /* null terminated utf-8 */ +}; + +#define zcvt_empty {0,0,0.0,NULL} + /* zarr.c */ EXTERNL int ncz_create_dataset(NC_FILE_INFO_T*, NC_GRP_INFO_T*, const char** controls); EXTERNL int ncz_open_dataset(NC_FILE_INFO_T*, const char** controls); @@ -31,8 +41,9 @@ EXTERNL int ncz_unload_jatts(NCZ_FILE_INFO_T*, NC_OBJ* container, NCjson* jattrs EXTERNL int ncz_close_file(NC_FILE_INFO_T* file, int abort); /* zcvt.c */ -EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, unsigned char* memory0); -EXTERNL int NCZ_stringconvert1(nc_type typid, size_t len, char* src, NCjson* jvalue); +EXTERNL int NCZ_json2cvt(NCjson* jsrc, struct ZCVT* zcvt, nc_type* typeidp); +EXTERNL int NCZ_convert1(NCjson* jsrc, nc_type, NCbytes*); +EXTERNL int NCZ_stringconvert1(nc_type typid, char* src, NCjson* jvalue); EXTERNL int NCZ_stringconvert(nc_type typid, size_t len, void* data0, NCjson** jdatap); /* zsync.c */ @@ -53,9 +64,11 @@ EXTERNL int NCZ_dimkey(const NC_DIM_INFO_T* dim, char** pathp); EXTERNL int ncz_splitkey(const char* path, NClist* segments); EXTERNL int NCZ_readdict(NCZMAP* zmap, const char* key, NCjson** jsonp); EXTERNL int NCZ_readarray(NCZMAP* zmap, const char* key, NCjson** jsonp); -EXTERNL int ncz_zarr_type_name(nc_type nctype, int little, const char** znamep); -EXTERNL int ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep); -EXTERNL int ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianness); +EXTERNL int ncz_nctypedecode(const char* snctype, nc_type* nctypep); +EXTERNL int ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr,int len, char** dnamep); +EXTERNL int ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp); +EXTERNL int NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp); +EXTERNL int NCZ_inferinttype(unsigned long long u64, int negative); EXTERNL int ncz_fill_value_sort(nc_type nctype, int*); EXTERNL int NCZ_createobject(NCZMAP* zmap, const char* key, size64_t size); EXTERNL int NCZ_uploadjson(NCZMAP* zmap, const char* key, NCjson* json); @@ -73,6 +86,11 @@ EXTERNL int NCZ_ischunkname(const char* name,char dimsep); EXTERNL char* NCZ_chunkpath(struct ChunkKey key); EXTERNL int NCZ_reclaim_fill_value(NC_VAR_INFO_T* var); EXTERNL int NCZ_copy_fill_value(NC_VAR_INFO_T* var, void** dstp); +EXTERNL int NCZ_get_maxstrlen(NC_OBJ* obj); +EXTERNL int NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen); +EXTERNL int NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen); +EXTERNL int NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int nofill, void* copy); +EXTERNL int NCZ_iscomplexjson(NCjson* value, nc_type typehint); /* zwalk.c */ EXTERNL int NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata); diff --git a/libnczarr/zattr.c b/libnczarr/zattr.c index 8cd4cfd7a..c3f890d74 100644 --- a/libnczarr/zattr.c +++ b/libnczarr/zattr.c @@ -68,9 +68,11 @@ ncz_getattlist(NC_GRP_INFO_T *grp, int varid, NC_VAR_INFO_T **varp, NCindex **at } /** - * @internal Get one of three special attributes, NCPROPS, - * ISNETCDF4ATT, and SUPERBLOCKATT. These atts are not all really in - * the file, they are constructed on the fly. + * @internal Get one of the special attributes: + * See the reserved attribute table in libsrc4/nc4internal.c. + * The special attributes are the ones marked with NAMEONLYFLAG. + * For example: NCPROPS, ISNETCDF4ATT, and SUPERBLOCKATT, and CODECS. + * These atts are not all really in the file, they are constructed on the fly. * * @param h5 Pointer to ZARR file info struct. * @param var Pointer to var info struct; NULL signals global. @@ -323,8 +325,9 @@ NCZ_del_att(int ncid, int varid, const char *name) return NC_ENOTATT; /* Reclaim the content of the attribute */ - if(att->data) + if(att->data) { if((retval = nc_reclaim_data_all(ncid,att->nc_typeid,att->data,att->len))) return retval; + } att->data = NULL; att->len = 0; @@ -426,7 +429,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, size_t len, const void *data, nc_type mem_type, int force) { NC* nc; - NC_FILE_INFO_T *h5; + NC_FILE_INFO_T *h5 = NULL; NC_VAR_INFO_T *var = NULL; NCindex* attlist = NULL; NC_ATT_INFO_T* att; @@ -575,7 +578,7 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, } /* If this is the _FillValue attribute, then we will also have to - * copy the value to the fill_vlue pointer of the NC_VAR_INFO_T + * copy the value to the fill_value pointer of the NC_VAR_INFO_T * struct for this var. (But ignore a global _FillValue * attribute). Also kill the cache fillchunk as no longer valid */ if (!strcmp(att->hdr.name, _FillValue) && varid != NC_GLOBAL) @@ -670,6 +673,23 @@ ncz_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type, att->data = copy; copy = NULL; } } + + /* If this is a maxstrlen attribute, then we will also have to + * sync the value to NCZ_VAR_INFO_T or NCZ_FILE_INFO_T structure */ + { + if(strcmp(att->hdr.name,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0 && varid == NC_GLOBAL && len == 1) { + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)h5->format_file_info; + if((retval = nc4_convert_type(att->data, &zfile->default_maxstrlen, file_type, NC_INT, + len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) + BAIL(retval); + } else if(strcmp(att->hdr.name,NC_NCZARR_MAXSTRLEN_ATTR)==0 && varid != NC_GLOBAL && len == 1) { + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if((retval = nc4_convert_type(att->data, &zvar->maxstrlen, file_type, NC_INT, + len, &range_error, NULL, NC_CLASSIC_MODEL, NC_NOQUANTIZE, 0))) + BAIL(retval); + } + } + att->dirty = NC_TRUE; att->created = NC_FALSE; att->len = len; @@ -994,14 +1014,19 @@ ncz_makeattr(NC_OBJ* container, NCindex* attlist, const char* name, nc_type type NCZ_ATT_INFO_T* zatt = NULL; void* clone = NULL; size_t typesize, clonesize; + int ncid; + NC* nc = NULL; NC_GRP_INFO_T* grp = (container->sort == NCGRP ? (NC_GRP_INFO_T*)container : ((NC_VAR_INFO_T*)container)->container); + nc = grp->nc4_info->controller; + ncid = nc->ext_ncid | grp->hdr.id; + /* Duplicate the values */ if ((stat = nc4_get_typelen_mem(grp->nc4_info, typeid, &typesize))) goto done; clonesize = len*typesize; if((clone = malloc(clonesize))==NULL) {stat = NC_ENOMEM; goto done;} - memcpy(clone,values,clonesize); + if((stat = nc_copy_data(ncid, typeid, values, len, clone))) goto done; if((stat=nc4_att_list_add(attlist,name,&att))) goto done; diff --git a/libnczarr/zcache.h b/libnczarr/zcache.h index f5f7362dd..2ef0fe8ad 100644 --- a/libnczarr/zcache.h +++ b/libnczarr/zcache.h @@ -32,6 +32,7 @@ typedef struct NCZCacheEntry { } key; size64_t hashkey; int isfiltered; /* 1=>data contains filtered data else real data */ + int isfixedstring; /* 1 => data contains the fixed strings, 0 => data contains pointers to strings */ size64_t size; /* |data| */ void* data; /* contains either filtered or real data */ } NCZCacheEntry; diff --git a/libnczarr/zclose.c b/libnczarr/zclose.c index f1f3354b1..cc8b4d006 100644 --- a/libnczarr/zclose.c +++ b/libnczarr/zclose.c @@ -172,10 +172,10 @@ zclose_vars(NC_GRP_INFO_T* grp) var->filters = NULL; #endif /* Reclaim the type */ - (void)zclose_type(var->type_info); - NCZ_free_chunk_cache(zvar->cache); + if(var->type_info) (void)zclose_type(var->type_info); + if(zvar->cache) NCZ_free_chunk_cache(zvar->cache); /* reclaim xarray */ - nclistfreeall(zvar->xarray); + if(zvar->xarray) nclistfreeall(zvar->xarray); nullfree(zvar); var->format_var_info = NULL; /* avoid memory errors */ } @@ -223,13 +223,9 @@ static int zclose_type(NC_TYPE_INFO_T* type) { int stat = NC_NOERR; - NCZ_TYPE_INFO_T* ztype; assert(type && type->format_type_info != NULL); - /* Get Zarr-specific type info. */ - ztype = type->format_type_info; - nullfree(ztype); - type->format_type_info = NULL; /* avoid memory errors */ + nullfree(type->format_type_info); return stat; } diff --git a/libnczarr/zcvt.c b/libnczarr/zcvt.c index 4b59b881d..6fd781f34 100644 --- a/libnczarr/zcvt.c +++ b/libnczarr/zcvt.c @@ -15,51 +15,121 @@ Code taken directly from libdap4/d4cvt.c */ -/* Intermediate results */ -struct ZCVT { - signed long long int64v; - unsigned long long uint64v; - double float64v; +static const int ncz_type_size[NC_MAX_ATOMIC_TYPE+1] = { +0, /*NC_NAT*/ +sizeof(char), /*NC_BYTE*/ +sizeof(char), /*NC_CHAR*/ +sizeof(short), /*NC_SHORT*/ +sizeof(int), /*NC_INT*/ +sizeof(float), /*NC_FLOAT*/ +sizeof(double), /*NC_DOUBLE*/ +sizeof(unsigned char), /*NC_UBYTE*/ +sizeof(unsigned short), /*NC_USHORT*/ +sizeof(unsigned int), /*NC_UINT*/ +sizeof(long long), /*NC_INT64*/ +sizeof(unsigned long long), /*NC_UINT64*/ +sizeof(char *), /*NC_STRING*/ }; /* Forward */ static int typeid2jtype(nc_type typeid); -/* Convert an NCJ_STRING to a memory equivalent value of specified dsttype */ +#if 0 +/* Convert a JSON value to a struct ZCVT value and also return the type */ int -NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) +NCZ_string2cvt(char* src, nc_type srctype, struct ZCVT* zcvt, nc_type* typeidp) { int stat = NC_NOERR; - nc_type srctype; - struct ZCVT zcvt; - int outofrange = 0; + nc_type dsttype = NC_NAT; - /* Convert the incoming jsrc string to a restricted set of values */ + assert(zcvt); + + /* Convert to a restricted set of values */ + switch (srctype) { + case NC_BYTE: { + zcvt->int64v = (signed long long)(*((signed char*)src)); + dsttype = NC_INT64; + } break; + case NC_UBYTE: { + zcvt->uint64v = (unsigned long long)(*((unsigned char*)src)); + dsttype = NC_UINT64; + } break; + case NC_SHORT: { + zcvt->int64v = (signed long long)(*((signed short*)src)); + dsttype = NC_INT64; + } break; + case NC_USHORT: { + zcvt->uint64v = (unsigned long long)(*((unsigned short*)src)); + dsttype = NC_UINT64; + } break; + case NC_INT: { + zcvt->int64v = (signed long long)(*((signed int*)src)); + dsttype = NC_INT64; + } break; + case NC_UINT: { + zcvt->uint64v = (unsigned long long)(*((unsigned int*)src)); + dsttype = NC_UINT64; + } break; + case NC_INT64: { + zcvt->int64v = (signed long long)(*((signed long long*)src)); + dsttype = NC_INT64; + } break; + case NC_UINT64: { + zcvt->uint64v = (unsigned long long)(*((unsigned long long*)src)); + dsttype = NC_UINT64; + } break; + case NC_FLOAT: { + zcvt->float64v = (double)(*((float*)src)); + dsttype = NC_DOUBLE; + } break; + case NC_DOUBLE: { + dsttype = NC_DOUBLE; + zcvt->float64v= (double)(*((double*)src)); + } break; + case NC_STRING: { + dsttype = NC_STRING; + zcvt->strv= *((char**)src); + } break; + default: stat = NC_EINTERNAL; goto done; + } + if(typeidp) *typeidp = dsttype; +done: + return stat; +} +#endif + +/* Warning: not free returned zcvt.strv; it may point into a string in jsrc */ +int +NCZ_json2cvt(NCjson* jsrc, struct ZCVT* zcvt, nc_type* typeidp) +{ + int stat = NC_NOERR; + nc_type srctype = NC_NAT; + + /* Convert the incoming jsrc to a restricted set of values */ switch (NCJsort(jsrc)) { case NCJ_INT: /* convert to (u)int64 */ if(NCJstring(jsrc)[0] == '-') { - if(sscanf(NCJstring(jsrc),"%lld",&zcvt.int64v) != 1) + if(sscanf(NCJstring(jsrc),"%lld",&zcvt->int64v) != 1) {stat = NC_EINVAL; goto done;} srctype = NC_INT64; } else { - if(sscanf(NCJstring(jsrc),"%llu",&zcvt.uint64v) != 1) + if(sscanf(NCJstring(jsrc),"%llu",&zcvt->uint64v) != 1) {stat = NC_EINVAL; goto done;} srctype = NC_UINT64; } break; - case NCJ_STRING: case NCJ_DOUBLE: /* Capture nan and infinity values */ if(strcasecmp(NCJstring(jsrc),"nan")==0) - zcvt.float64v = NAN; + zcvt->float64v = NAN; else if(strcasecmp(NCJstring(jsrc),"-nan")==0) - zcvt.float64v = - NAN; + zcvt->float64v = - NAN; else if(strcasecmp(NCJstring(jsrc),"infinity")==0) - zcvt.float64v = INFINITY; + zcvt->float64v = INFINITY; else if(strcasecmp(NCJstring(jsrc),"-infinity")==0) - zcvt.float64v = (- INFINITY); + zcvt->float64v = (- INFINITY); else { - if(sscanf(NCJstring(jsrc),"%lg",&zcvt.float64v) != 1) + if(sscanf(NCJstring(jsrc),"%lg",&zcvt->float64v) != 1) {stat = NC_EINVAL; goto done;} } srctype = NC_DOUBLE; @@ -67,169 +137,233 @@ NCZ_convert1(NCjson* jsrc, nc_type dsttype, unsigned char* memory) case NCJ_BOOLEAN: srctype = NC_UINT64; if(strcasecmp(NCJstring(jsrc),"false")==0) - zcvt.uint64v = 0; + zcvt->uint64v = 0; else - zcvt.uint64v = 1; + zcvt->uint64v = 1; + break; + case NCJ_STRING: + srctype = NC_STRING; + zcvt->strv = NCJstring(jsrc); break; default: stat = NC_EINTERNAL; goto done; } + if(typeidp) *typeidp = srctype; +done: + return stat; +} + +/* Convert a singleton NCjson value to a memory equivalent value of specified dsttype; */ +int +NCZ_convert1(NCjson* jsrc, nc_type dsttype, NCbytes* buf) +{ + int stat = NC_NOERR; + nc_type srctype; + struct ZCVT zcvt = zcvt_empty; + int outofrange = 0; + size_t len = 0; + + assert(dsttype != NC_NAT && dsttype <= NC_MAX_ATOMIC_TYPE && buf); + + switch (NCJsort(jsrc)) { + case NCJ_STRING: case NCJ_INT: case NCJ_DOUBLE: case NCJ_BOOLEAN: + if((stat = NCZ_json2cvt(jsrc,&zcvt,&srctype))) goto done; + break; + default: stat = NC_EINVAL; goto done; /* Illegal JSON */ + } + + len = ncz_type_size[dsttype]; /* may change later */ + /* Now, do the down conversion */ switch (dsttype) { case NC_BYTE: { - signed char* p = (signed char*)memory; + signed char c = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < NC_MIN_BYTE || zcvt.int64v > NC_MAX_BYTE) outofrange = 1; - *p = (signed char)zcvt.int64v; + c = (signed char)zcvt.int64v; + ncbytesappend(buf,(char)c); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_BYTE) outofrange = 1; - *p = (signed char)zcvt.uint64v; + c = (signed char)zcvt.uint64v; + ncbytesappend(buf,(char)c); break; } } break; case NC_UBYTE: { - unsigned char* p = (unsigned char*)memory; + unsigned char c = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_BYTE) outofrange = 1; - *p = (unsigned char)zcvt.int64v; + c = (unsigned char)zcvt.int64v; + ncbytesappend(buf,(char)c); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_UBYTE) outofrange = 1; - *p = (unsigned char)zcvt.uint64v; + c = (unsigned char)zcvt.uint64v; + ncbytesappend(buf,(char)c); break; } } break; case NC_SHORT: { - signed short* p = (signed short*)memory; + signed short s = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < NC_MIN_SHORT || zcvt.int64v > NC_MAX_SHORT) outofrange = 1; - *p = (signed short)zcvt.int64v; + s = (signed short)zcvt.int64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_SHORT) outofrange = 1; - *p = (signed short)zcvt.uint64v; + s = (signed short)zcvt.uint64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; } } break; case NC_USHORT: { - unsigned short* p = (unsigned short*)memory; + unsigned short s = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_USHORT) outofrange = 1; - *p = (unsigned short)zcvt.int64v; + s = (unsigned short)zcvt.int64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_USHORT) outofrange = 1; - *p = (unsigned short)zcvt.uint64v; + s = (unsigned short)zcvt.uint64v; + ncbytesappendn(buf,(char*)&s,sizeof(s)); break; } } break; case NC_INT: { - signed int* p = (signed int*)memory; + signed int ii = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < NC_MIN_INT || zcvt.int64v > NC_MAX_INT) outofrange = 1; - *p = (signed int)zcvt.int64v; + ii = (signed int)zcvt.int64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_INT) outofrange = 1; - *p = (signed int)zcvt.uint64v; + ii = (signed int)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; } } break; case NC_UINT: { - unsigned int* p = (unsigned int*)memory; + unsigned int ii = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: if(zcvt.int64v < 0 || zcvt.int64v > NC_MAX_UINT) outofrange = 1; - *p = (unsigned int)zcvt.int64v; + ii = (unsigned int)zcvt.int64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_UINT) outofrange = 1; - *p = (unsigned int)zcvt.uint64v; + ii = (unsigned int)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ii,sizeof(ii)); break; } } break; case NC_INT64: { - signed long long* p = (signed long long*)memory; + signed long long ll = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (long long)zcvt.float64v; /* Convert to int64 */ /* fall thru */ case NC_INT64: - *p = (signed long long)zcvt.int64v; + ll = (signed long long)zcvt.int64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; case NC_UINT64: if(zcvt.uint64v > NC_MAX_INT64) outofrange = 1; - *p = (signed long long)zcvt.uint64v; + ll = (signed long long)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; } } break; case NC_UINT64: { - unsigned long long* p = (unsigned long long*)memory; + unsigned long long ll = 0; switch (srctype) { case NC_DOUBLE: zcvt.int64v = (signed long long)zcvt.float64v; /* fall thru */ case NC_INT64: if(zcvt.int64v < 0) outofrange = 1; - *p = (unsigned long long)zcvt.int64v; + ll = (unsigned long long)zcvt.int64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; case NC_UINT64: - *p = (unsigned long long)zcvt.uint64v; + ll = (unsigned long long)zcvt.uint64v; + ncbytesappendn(buf,(char*)&ll,sizeof(ll)); break; } } break; case NC_FLOAT: { - float* p = (float*)memory; + float f = 0; switch (srctype) { case NC_DOUBLE: - *p = (float)zcvt.float64v; + f = (float)zcvt.float64v; + ncbytesappendn(buf,(char*)&f,sizeof(f)); break; case NC_INT64: - *p = (float)zcvt.int64v; + f = (float)zcvt.int64v; + ncbytesappendn(buf,(char*)&f,sizeof(f)); break; case NC_UINT64: - *p = (float)zcvt.uint64v; + f = (float)zcvt.uint64v; + ncbytesappendn(buf,(char*)&f,sizeof(f)); break; } } break; case NC_DOUBLE: { - double* p = (double*)memory; + double d = 0; switch (srctype) { case NC_DOUBLE: - *p = (double)zcvt.float64v; + d = (double)zcvt.float64v; + ncbytesappendn(buf,(char*)&d,sizeof(d)); break; case NC_INT64: - *p = (double)zcvt.int64v; - break; - case NC_UINT64: - *p = (double)zcvt.uint64v; + d = (double)zcvt.int64v; + ncbytesappendn(buf,(char*)&d,sizeof(d)); + case NC_UINT64: + d = (double)zcvt.uint64v; + ncbytesappendn(buf,(char*)&d,sizeof(d)); break; } } break; + case NC_STRING: { + char* scopy = NULL; + if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} + /* Need to append the pointer and not what it points to */ + scopy = nulldup(zcvt.strv); + ncbytesappendn(buf,(void*)&scopy,sizeof(scopy)); + scopy = NULL; + } break; + case NC_CHAR: { + if(srctype != NC_STRING) {stat = NC_EINVAL; goto done;} + len = strlen(zcvt.strv); + ncbytesappendn(buf,zcvt.strv,len); + } break; default: stat = NC_EINTERNAL; goto done; } @@ -240,14 +374,15 @@ done: /* Convert a memory value to a JSON string value */ int -NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) +NCZ_stringconvert1(nc_type srctype, char* src, NCjson* jvalue) { int stat = NC_NOERR; struct ZCVT zcvt; nc_type dsttype = NC_NAT; char s[1024]; + char* p = NULL; - assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype < NC_STRING); + assert(srctype >= NC_NAT && srctype != NC_CHAR && srctype <= NC_STRING); /* Convert to a restricted set of values */ switch (srctype) { case NC_BYTE: { @@ -290,6 +425,10 @@ NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) dsttype = NC_DOUBLE; zcvt.float64v= (double)(*((double*)src)); } break; + case NC_STRING: { + dsttype = NC_STRING; + zcvt.strv= *((char**)src); + } break; default: stat = NC_EINTERNAL; goto done; } @@ -326,13 +465,21 @@ NCZ_stringconvert1(nc_type srctype, size_t len, char* src, NCjson* jvalue) } #endif } break; + case NC_STRING: { + p = nulldup(zcvt.strv); + } break; default: stat = NC_EINTERNAL; goto done; } - NCJsetstring(jvalue,strdup(s)); + if(p == NULL) + p = strdup(s); + NCJsetstring(jvalue,p); + p = NULL; done: + nullfree(p); return stat; } +/* Convert arbitrary netcdf attribute vector to equivalent JSON */ int NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) { @@ -352,17 +499,19 @@ NCZ_stringconvert(nc_type typeid, size_t len, void* data0, NCjson** jdatap) /* Handle char type specially */ if(typeid == NC_CHAR) { - /* Create a string valued json object */ - if((stat = NCJnewstringn(NCJ_STRING,len,src,&jdata))) - goto done; + /* Apply the JSON write convention */ + if((stat = NCJparsen(len,src,0,&jdata))) { /* !parseable */ + /* Create a string valued json object */ + if((stat = NCJnewstringn(NCJ_STRING,len,src,&jdata))) goto done; + } } else if(len == 1) { /* create singleton */ if((stat = NCJnew(jtype,&jdata))) goto done; - if((stat = NCZ_stringconvert1(typeid, len, src, jdata))) goto done; + if((stat = NCZ_stringconvert1(typeid, src, jdata))) goto done; } else { /* len > 1 create array of values */ if((stat = NCJnew(NCJ_ARRAY,&jdata))) goto done; for(i=0;i<len;i++) { if((stat = NCJnew(jtype,&jvalue))) goto done; - if((stat = NCZ_stringconvert1(typeid, len, src, jvalue))) goto done; + if((stat = NCZ_stringconvert1(typeid, src, jvalue))) goto done; NCJappend(jdata,jvalue); jvalue = NULL; src += typelen; @@ -388,6 +537,7 @@ typeid2jtype(nc_type typeid) case NC_DOUBLE: return NCJ_DOUBLE; case NC_CHAR: + case NC_STRING: return NCJ_STRING; default: break; } diff --git a/libnczarr/zdebug.h b/libnczarr/zdebug.h index 7874524f2..36d964ef9 100644 --- a/libnczarr/zdebug.h +++ b/libnczarr/zdebug.h @@ -8,8 +8,8 @@ #undef ZDEBUG /* general debug */ #undef ZDEBUG1 /* detailed debug */ -#undef ZCATCH /* Warning: significant performance impact */ -#undef ZTRACING /* Warning: significant performance impact */ +#define ZCATCH /* Warning: significant performance impact */ +#define ZTRACING /* Warning: significant performance impact */ #include "ncexternl.h" #include "nclog.h" diff --git a/libnczarr/zfile.c b/libnczarr/zfile.c index 9071011b5..a4233bd17 100644 --- a/libnczarr/zfile.c +++ b/libnczarr/zfile.c @@ -116,17 +116,6 @@ NCZ_enddef(NC_FILE_INFO_T* h5) assert(var); var->written_to = NC_TRUE; /* mark it written */ var->created = 1; -#if 0 - /* set the fill value and _FillValue attribute */ - if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ - assert(var->no_fill || var->fill_value != NULL); - /* rebuild the fill chunk */ - if((stat = NCZ_adjust_var_cache(var))) goto done; -#ifdef ENABLE_NCZARR_FILTERS - /* Build the filter working parameters for any filters */ - if((stat = NCZ_filter_setup(var))) goto done; -#endif -#endif /*0|1*/ } } if((stat = ncz_enddef_netcdf4_file(h5))) goto done; diff --git a/libnczarr/zinternal.h b/libnczarr/zinternal.h index 0e3cec55a..3672c340f 100644 --- a/libnczarr/zinternal.h +++ b/libnczarr/zinternal.h @@ -22,9 +22,6 @@ #define NCZ_CHUNKSIZE_FACTOR (10) #define NCZ_MIN_CHUNK_SIZE (2) -/* An attribute in the ZARR root group of this name means that the - * file must follow strict netCDF classic format rules. */ -#define NCZ_NC3_STRICT_ATT_NAME "_nc3_strict" /**************************************************/ /* Constants */ @@ -62,43 +59,54 @@ /* V2 Reserved Attributes */ /* Inserted into /.zgroup -_NCZARR_SUPERBLOCK: {"version": "2.0.0"} +_nczarr_superblock: {"version": "2.0.0"} Inserted into any .zgroup -"_NCZARR_GROUP": "{ +"_nczarr_group": "{ \"dimensions\": {\"d1\": \"1\", \"d2\": \"1\",...} \"variables\": [\"v1\", \"v2\", ...] \"groups\": [\"g1\", \"g2\", ...] }" Inserted into any .zarray -"_NCZARR_ARRAY": "{ +"_nczarr_array": "{ \"dimensions\": [\"/g1/g2/d1\", \"/d2\",...] \"storage\": \"scalar\"|\"contiguous\"|\"compact\"|\"chunked\" }" Inserted into any .zattrs ? or should it go into the container? -"_NCZARR_ATTRS": "{ +"_nczarr_attrs": "{ \"types\": {\"attr1\": \"<i4\", \"attr2\": \"<i1\",...} } ++ ++Note: _nczarr_attrs type include non-standard use of a zarr type "|U1" => NC_CHAR. ++ */ -#define NCZ_V2_SUPERBLOCK "_NCZARR_SUPERBLOCK" -#define NCZ_V2_GROUP "_NCZARR_GROUP" -#define NCZ_V2_ARRAY "_NCZARR_ARRAY" +#define NCZ_V2_SUPERBLOCK "_nczarr_superblock" +#define NCZ_V2_GROUP "_nczarr_group" +#define NCZ_V2_ARRAY "_nczarr_array" #define NCZ_V2_ATTR NC_NCZARR_ATTR +#define NCZ_V2_SUPERBLOCK_UC "_NCZARR_SUPERBLOCK" +#define NCZ_V2_GROUP_UC "_NCZARR_GROUP" +#define NCZ_V2_ARRAY_UC "_NCZARR_ARRAY" +#define NCZ_V2_ATTR_UC NC_NCZARR_ATTR_UC + +#define NCZARRCONTROL "nczarr" #define PUREZARRCONTROL "zarr" #define XARRAYCONTROL "xarray" #define NOXARRAYCONTROL "noxarray" +#define XARRAYSCALAR "_scalar_" #define LEGAL_DIM_SEPARATORS "./" #define DFALT_DIM_SEPARATOR '.' +/* Default max string length for fixed length strings */ +#define NCZ_MAXSTR_DEFAULT 64 + #define islegaldimsep(c) ((c) != '\0' && strchr(LEGAL_DIM_SEPARATORS,(c)) != NULL) /* Mnemonics */ -#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ - -/* Mnemonics */ -#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ +#define ZCLEAR 0 /* For NCZ_copy_data */ +#define ZCLOSE 1 /* this is closeorabort as opposed to enddef */ /* Useful macro */ #define ncidforx(file,grpid) ((file)->controller->ext_ncid | (grpid)) @@ -146,6 +154,7 @@ typedef struct NCZ_FILE_INFO { # define FLAG_NCZARR_V1 16 NCZM_IMPL mapimpl; } controls; + int default_maxstrlen; /* default max str size for variables of type string */ } NCZ_FILE_INFO_T; /* This is a struct to handle the dim metadata. */ @@ -186,6 +195,7 @@ typedef struct NCZ_VAR_INFO { struct NClist* xarray; /* names from _ARRAY_DIMENSIONS */ char dimension_separator; /* '.' | '/' */ NClist* incompletefilters; + int maxstrlen; /* max length of strings for this variable */ } NCZ_VAR_INFO_T; /* Struct to hold ZARR-specific info for a field. */ diff --git a/libnczarr/zsync.c b/libnczarr/zsync.c index d5cbee966..0b96ab0d2 100644 --- a/libnczarr/zsync.c +++ b/libnczarr/zsync.c @@ -20,10 +20,9 @@ static int ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp); static int ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose); -static int ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp); static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypes); -static int zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst); -static int computeattrinfo(const char* name, NClist* atypes, NCjson* values, +static int zconvert(NCjson* src, nc_type typeid, size_t typelen, int* countp, NCbytes* dst); +static int computeattrinfo(const char* name, NClist* atypes, nc_type typehint, int purezarr, NCjson* values, nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap); static int parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* subgrps); static int parse_group_content_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NClist* varnames, NClist* subgrps); @@ -37,12 +36,10 @@ static int locategroup(NC_FILE_INFO_T* file, size_t nsegs, NClist* segments, NC_ static int createdim(NC_FILE_INFO_T* file, const char* name, size64_t dimlen, NC_DIM_INFO_T** dimp); static int parsedimrefs(NC_FILE_INFO_T*, NClist* dimnames, size64_t* shape, NC_DIM_INFO_T** dims, int create); static int decodeints(NCjson* jshape, size64_t* shapes); -static int computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap); -static int inferattrtype(NCjson* values, nc_type* typeidp); -static int mininttype(unsigned long long u64, int negative); +static int computeattrdata(nc_type typehint, nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap); static int computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarray, int ndims, NClist* dimnames, size64_t* shapes, NC_DIM_INFO_T** dims); -static int read_dict(NCjson* jdict, NCjson** jtextp); -static int write_dict(size_t len, const void* data, NCjson** jsonp); +static int json_convention_read(NCjson* jdict, NCjson** jtextp); +static int jtypes2atypes(NCjson* jtypes, NClist* atypes); /**************************************************/ /**************************************************/ @@ -97,6 +94,7 @@ ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) NCjson* jdims = NULL; LOG((3, "%s: ", __func__)); + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); NCJnew(NCJ_DICT,&jdims); for(i=0; i<ncindexsize(grp->dim); i++) { @@ -109,7 +107,7 @@ ncz_collect_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NCjson** jdimsp) if(jdimsp) {*jdimsp = jdims; jdims = NULL;} done: NCJreclaim(jdims); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -140,6 +138,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NCjson* jtmp = NULL; LOG((3, "%s: dims: %s", __func__, key)); + ZTRACE(3,"file=%s grp=%s isclose=%d",file->controller->path,grp->hdr.name,isclose); zinfo = file->format_file_info; map = zinfo->map; @@ -169,7 +168,7 @@ ncz_sync_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, int isclose) NC_GRP_INFO_T* g = (NC_GRP_INFO_T*)ncindexith(grp->children,i); if((stat = NCJaddstring(jsubgrps,NCJ_STRING,g->hdr.name))) goto done; } - /* Create the "_NCZARR_GROUP" dict */ + /* Create the "_nczarr_group" dict */ if((stat = NCJnew(NCJ_DICT,&json))) goto done; /* Insert the various dicts and arrays */ @@ -241,7 +240,7 @@ done: NCJreclaim(jsubgrps); nullfree(fullpath); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -270,6 +269,8 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) NCjson* jdimrefs = NULL; NCjson* jtmp = NULL; NCjson* jfill = NULL; + char* dtypename = NULL; + int purezarr = 0; size64_t shape[NC_MAX_VAR_DIMS]; NCZ_VAR_INFO_T* zvar = var->format_var_info; #ifdef ENABLE_NCZARR_FILTERS @@ -277,10 +278,13 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) NCjson* jfilter = NULL; #endif + ZTRACE(3,"file=%s var=%s isclose=%d",file->controller->path,var->hdr.name,isclose); + zinfo = file->format_file_info; map = zinfo->map; -#if 1 + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + /* Make sure that everything is established */ /* ensure the fill value */ if((stat = NCZ_ensure_fill_value(var))) goto done; /* ensure var->fill_value is set */ @@ -293,7 +297,6 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* Build the filter working parameters for any filters */ if((stat = NCZ_filter_setup(var))) goto done; #endif -#endif /*0|1*/ /* Construct var path */ if((stat = NCZ_varkey(var,&fullpath))) @@ -321,7 +324,9 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* Integer list defining the length of each dimension of the array.*/ /* Create the list */ if((stat = NCJnew(NCJ_ARRAY,&jtmp))) goto done; - for(i=0;i<var->ndims+zvar->scalar;i++) { + if(zvar->scalar) { + NCJaddstring(jtmp,NCJ_INT,"1"); + } else for(i=0;i<var->ndims;i++) { snprintf(number,sizeof(number),"%llu",shape[i]); NCJaddstring(jtmp,NCJ_INT,number); } @@ -332,18 +337,12 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) /* A string or list defining a valid data type for the array. */ if((stat = NCJaddstring(jvar,NCJ_STRING,"dtype"))) goto done; { /* Add the type name */ - const char* dtypename; int endianness = var->type_info->endianness; - int islittle; - switch (endianness) { - case NC_ENDIAN_LITTLE: islittle = 1; break; - case NC_ENDIAN_BIG: islittle = 0; break; - case NC_ENDIAN_NATIVE: abort(); /* should never happen */ - } int atomictype = var->type_info->hdr.id; - assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE && atomictype != NC_STRING); - if((stat = ncz_zarr_type_name(atomictype,islittle,&dtypename))) goto done; + assert(atomictype > 0 && atomictype <= NC_MAX_ATOMIC_TYPE); + if((stat = ncz_nctype2dtype(atomictype,endianness,purezarr,NCZ_get_maxstrlen((NC_OBJ*)var),&dtypename))) goto done; if((stat = NCJaddstring(jvar,NCJ_STRING,dtypename))) goto done; + nullfree(dtypename); dtypename = NULL; } /* chunks key */ @@ -356,7 +355,9 @@ ncz_sync_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) if((stat = NCJaddstring(jvar,NCJ_STRING,"chunks"))) goto done; /* Create the list */ if((stat = NCJnew(NCJ_ARRAY,&jtmp))) goto done; - for(i=0;i<(var->ndims+zvar->scalar);i++) { + if(zvar->scalar) { + NCJaddstring(jtmp,NCJ_INT,"1"); /* one chunk of size 1 */ + } else for(i=0;i<var->ndims;i++) { size64_t len = (var->storage == NC_CONTIGUOUS ? shape[i] : var->chunksizes[i]); snprintf(number,sizeof(number),"%lld",len); NCJaddstring(jtmp,NCJ_INT,number); @@ -511,12 +512,13 @@ done: nclistfreeall(dimrefs); nullfree(fullpath); nullfree(key); + nullfree(dtypename); nullfree(dimpath); NCJreclaim(jvar); NCJreclaim(jncvar); NCJreclaim(jtmp); NCJreclaim(jfill); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -534,6 +536,9 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) { int stat = NC_NOERR; NCZ_VAR_INFO_T* zvar = var->format_var_info; + + ZTRACE(3,"file=%s var=%s isclose=%d",file->controller->path,var->hdr.name,isclose); + if(isclose) { if((stat = ncz_sync_var_meta(file,var,isclose))) goto done; } @@ -545,7 +550,7 @@ ncz_sync_var(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int isclose) } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } @@ -559,6 +564,8 @@ ncz_write_var(NC_VAR_INFO_T* var) int stat = NC_NOERR; NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + ZTRACE(3,"var=%s",var->hdr.name); + /* Flush the cache */ if(zvar->cache) { if((stat = NCZ_flush_chunk_cache(zvar->cache))) goto done; @@ -590,6 +597,7 @@ ncz_write_var(NC_VAR_INFO_T* var) } } + { /* Iterate over all the chunks to create missing ones */ if((chunkodom = nczodom_new(var->ndims+zvar->scalar,start,stop,stride,stop))==NULL) {stat = NC_ENOMEM; goto done;} @@ -609,13 +617,14 @@ next: nullfree(key); key = NULL; } + } nczodom_free(chunkodom); nullfree(key); } #endif /*FILLONCLOSE*/ done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -638,52 +647,72 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc NCjson* jdimrefs = NULL; NCjson* jdict = NULL; NCjson* jint = NULL; + NCjson* jdata = NULL; NCZMAP* map = NULL; char* fullpath = NULL; char* key = NULL; char* content = NULL; char* dimpath = NULL; int isxarray = 0; - int isrootgroup = 0; + int inrootgroup = 0; NC_VAR_INFO_T* var = NULL; NC_GRP_INFO_T* grp = NULL; - - LOG((3, "%s", __func__)); - - if(container->sort == NCVAR) - var = (NC_VAR_INFO_T*)container; - else if(container->sort == NCGRP) - grp = (NC_GRP_INFO_T*)container; + char* tname = NULL; + int purezarr = 0; + int endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + LOG((3, "%s", __func__)); + ZTRACE(3,"file=%s container=%s |attlist|=%u",file->controller->path,container->name,(unsigned)ncindexsize(attlist)); + + if(container->sort == NCVAR) { + var = (NC_VAR_INFO_T*)container; + if(var->container && var->container->parent == NULL) + inrootgroup = 1; + } else if(container->sort == NCGRP) { + grp = (NC_GRP_INFO_T*)container; + } + zinfo = file->format_file_info; map = zinfo->map; + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; if(zinfo->controls.flags & FLAG_XARRAYDIMS) isxarray = 1; - if(container->sort == NCVAR) { - if(var->container && var->container->parent == NULL) - isrootgroup = 1; - } + /* Create the attribute dictionary */ + if((stat = NCJnew(NCJ_DICT,&jatts))) goto done; if(ncindexsize(attlist) > 0) { /* Create the jncattr.types object */ if((stat = NCJnew(NCJ_DICT,&jtypes))) goto done; - /* Walk all the attributes and collect the types by attribute name */ + /* Walk all the attributes convert to json and collect the dtype */ for(i=0;i<ncindexsize(attlist);i++) { NC_ATT_INFO_T* a = (NC_ATT_INFO_T*)ncindexith(attlist,i); - const char* tname = NULL; + size_t typesize = 0; +#if 0 const NC_reservedatt* ra = NC_findreserved(a->hdr.name); /* If reserved and hidden, then ignore */ if(ra && (ra->flags & HIDDENATTRFLAG)) continue; - if(a->nc_typeid >= NC_STRING) - {stat = THROW(NC_ENCZARR); goto done;} - if((stat = ncz_zarr_type_name(a->nc_typeid,1,&tname))) goto done; - if((stat = NCJnewstring(NCJ_STRING,tname,&jtype))) - goto done; - if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) /* add {name: type} */ - goto done; - jtype = NULL; +#endif + if(a->nc_typeid > NC_MAX_ATOMIC_TYPE) + {stat = (THROW(NC_ENCZARR)); goto done;} + if(a->nc_typeid == NC_STRING) + typesize = NCZ_get_maxstrlen(container); + else + {if((stat = NC4_inq_atomic_type(a->nc_typeid,NULL,&typesize))) goto done;} + /* Convert to storable json */ + if((stat = NCZ_stringconvert(a->nc_typeid,a->len,a->data,&jdata))) goto done; + if((stat = NCJinsert(jatts,a->hdr.name,jdata))) goto done; + jdata = NULL; + + /* Collect the corresponding dtype */ + { + if((stat = ncz_nctype2dtype(a->nc_typeid,endianness,purezarr,typesize,&tname))) goto done; + if((stat = NCJnewstring(NCJ_STRING,tname,&jtype))) goto done; + nullfree(tname); tname = NULL; + if((stat = NCJinsert(jtypes,a->hdr.name,jtype))) goto done; /* add {name: type} */ + jtype = NULL; + } } } @@ -695,17 +724,16 @@ ncz_sync_atts(NC_FILE_INFO_T* file, NC_OBJ* container, NCindex* attlist, int isc if(stat) goto done; - /* Jsonize the attribute list */ - if((stat = ncz_jsonize_atts(attlist,&jatts))) - goto done; - if(container->sort == NCVAR) { - if(isrootgroup && isxarray) { + if(inrootgroup && isxarray) { int dimsinroot = 1; /* Insert the XARRAY _ARRAY_ATTRIBUTE attribute */ if((stat = NCJnew(NCJ_ARRAY,&jdimrefs))) goto done; - /* Walk the dimensions to check in root group */ + /* Fake the scalar case */ + if(var->ndims == 0) { + NCJaddstring(jdimrefs,NCJ_STRING,XARRAYSCALAR); + } else /* Walk the dimensions and capture the names */ for(i=0;i<var->ndims;i++) { NC_DIM_INFO_T* dim = var->dim[i]; /* Verify that the dimension is in the root group */ @@ -780,69 +808,20 @@ done: nullfree(key); nullfree(content); nullfree(dimpath); + nullfree(tname); NCJreclaim(jatts); NCJreclaim(jtypes); NCJreclaim(jtype); NCJreclaim(jdimrefs); NCJreclaim(jdict); NCJreclaim(jint); - return THROW(stat); + NCJreclaim(jdata); + return ZUNTRACE(THROW(stat)); } /**************************************************/ -/** -@internal Convert a list of attributes to corresponding json. -Note that this does not push to the file. -Also note that attributes of length 1 are stored as singletons, not arrays. -This is to be more consistent with pure zarr. -Also implements the JSON dictionary convention. -@param attlist - [in] the attributes to dictify -@param jattrsp - [out] the json'ized att list -@return NC_NOERR -@author Dennis Heimbigner -*/ -static int -ncz_jsonize_atts(NCindex* attlist, NCjson** jattrsp) -{ - int stat = NC_NOERR; - int i, isdict; - NCjson* jattrs = NULL; - NCjson* akey = NULL; - NCjson* jdata = NULL; - - if((stat = NCJnew(NCJ_DICT,&jattrs))) goto done; - - /* Iterate over the attribute list */ - for(i=0;i<ncindexsize(attlist);i++) { - NC_ATT_INFO_T* att = (NC_ATT_INFO_T*)ncindexith(attlist,i); - isdict = 0; - /* Create the attribute dict value*/ - if(att->nc_typeid == NC_CHAR - && ((char*)att->data)[0] == DICTOPEN - && ((char*)att->data)[att->len-1] == DICTCLOSE) { - /* this is subject to the JSON dictionary convention? */ - if(write_dict(att->len,att->data,&jdata)==NC_NOERR) isdict=1; - } - if(!isdict) { - if((stat = NCZ_stringconvert(att->nc_typeid,att->len,att->data,&jdata))) - goto done; - } - if((stat = NCJinsert(jattrs,att->hdr.name,jdata))) goto done; - jdata = NULL; - } - - if(jattrsp) {*jattrsp = jattrs; jattrs = NULL;} - -done: - NCJreclaim(akey); - NCJreclaim(jdata); - NCJreclaim(jattrs); - NCJreclaim(jdata); - return THROW(stat); -} - /** @internal Extract attributes from a group or var and return the corresponding NCjson dict. @@ -856,7 +835,7 @@ the corresponding NCjson dict. static int load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClist** atypesp) { - int i,stat = NC_NOERR; + int stat = NC_NOERR; char* fullpath = NULL; char* key = NULL; NCjson* jnczarr = NULL; @@ -864,6 +843,8 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis NCjson* jncattr = NULL; NClist* atypes = NULL; /* envv list */ + ZTRACE(3,"map=%p container=%s nczarrv1=%d",map,container->name,nczarrv1); + /* alway return (possibly empty) list of types */ atypes = nclistnew(); @@ -883,7 +864,7 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if((stat = nczm_concat(fullpath,ZATTRS,&key))) goto done; - /* Download the .zattrs object: may not exist */ + /* Download the .zattrs object: may not exist if not NCZarr V1 */ switch ((stat=NCZ_downloadjson(map,key,&jattrs))) { case NC_NOERR: break; case NC_EEMPTY: stat = NC_NOERR; break; /* did not exist */ @@ -903,8 +884,10 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if((stat = nczm_concat(fullpath,NCZATTRDEP,&key))) goto done; stat=NCZ_downloadjson(map,key,&jncattr); } - } else {/* Get _NCZARR_ATTRS from .zattrs */ - stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); + } else {/* Get _nczarr_attrs from .zattrs */ + stat = NCJdictget(jattrs,NCZ_V2_ATTR,&jncattr); + if(!stat && jncattr == NULL) + {stat = NCJdictget(jattrs,NCZ_V2_ATTR_UC,&jncattr);} } nullfree(key); key = NULL; switch (stat) { @@ -915,20 +898,13 @@ load_jatts(NCZMAP* map, NC_OBJ* container, int nczarrv1, NCjson** jattrsp, NClis if(jncattr != NULL) { NCjson* jtypes = NULL; /* jncattr attribute should be a dict */ - if(NCJsort(jncattr) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jncattr) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} /* Extract "types; may not exist if only hidden attributes are defined */ if((stat = NCJdictget(jncattr,"types",&jtypes))) goto done; if(jtypes != NULL) { - if(NCJsort(jtypes) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jtypes) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} /* Convert to an envv list */ - for(i=0;i<NCJlength(jtypes);i+=2) { - const NCjson* key = NCJith(jtypes,i); - const NCjson* value = NCJith(jtypes,i+1); - if(NCJsort(key) != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;} - if(NCJsort(value) != NCJ_STRING) {stat = THROW(NC_ENCZARR); goto done;} - nclistpush(atypes,strdup(NCJstring(key))); - nclistpush(atypes,strdup(NCJstring(value))); - } + if((stat = jtypes2atypes(jtypes,atypes))) goto done; } } } @@ -944,52 +920,86 @@ done: } nullfree(fullpath); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); +} + +/* Convert a JSON singleton or array of strings to a single string */ +static int +zcharify(NCjson* src, NCbytes* buf) +{ + int i, stat = NC_NOERR; + struct NCJconst jstr = NCJconst_empty; + + if(NCJsort(src) != NCJ_ARRAY) { /* singleton */ + if((stat = NCJcvt(src, NCJ_STRING, &jstr))) goto done; + ncbytescat(buf,jstr.sval); + } else for(i=0;i<NCJlength(src);i++) { + NCjson* value = NCJith(src,i); + if((stat = NCJcvt(value, NCJ_STRING, &jstr))) goto done; + ncbytescat(buf,jstr.sval); + nullfree(jstr.sval);jstr.sval = NULL; + } +done: + nullfree(jstr.sval); + return stat; } /* Convert a json value to actual data values of an attribute. */ static int -zconvert(nc_type typeid, size_t typelen, NCjson* src, void* dst0) +zconvert(NCjson* src, nc_type typeid, size_t typelen, int* countp, NCbytes* dst) { int stat = NC_NOERR; int i; - size_t len; - unsigned char* dst = dst0; /* Work in char* space so we can do pointer arithmetic */ - + int count = 0; + + ZTRACE(3,"src=%s typeid=%d typelen=%u",NCJtotext(src),typeid,typelen); + + /* 3 cases: + (1) singleton atomic value + (2) array of atomic values + (3) other JSON expression + */ switch (NCJsort(src)) { + case NCJ_INT: case NCJ_DOUBLE: case NCJ_BOOLEAN: /* case 1 */ + count = 1; + if((stat = NCZ_convert1(src, typeid, dst))) + goto done; + break; + case NCJ_ARRAY: - for(i=0;i<NCJlength(src);i++) { - NCjson* value = NCJith(src,i); - assert(NCJsort(value) != NCJ_STRING); - if((stat = NCZ_convert1(value, typeid, dst))) - goto done; - dst += typelen; + if(typeid == NC_CHAR) { + if((stat = zcharify(src,dst))) goto done; + count = ncbyteslength(dst); + } else { + count = NCJlength(src); + for(i=0;i<count;i++) { + NCjson* value = NCJith(src,i); + if((stat = NCZ_convert1(value, typeid, dst))) goto done; + } } break; case NCJ_STRING: if(typeid == NC_CHAR) { - len = strlen(NCJstring(src)); - memcpy(dst,NCJstring(src),len); - dst[len] = '\0'; /* nul terminate */ - break; + if((stat = zcharify(src,dst))) goto done; + count = ncbyteslength(dst); + } else { + if((stat = NCZ_convert1(src, typeid, dst))) goto done; + count = 1; } - /* Fall thru */ - case NCJ_INT: case NCJ_DOUBLE: case NCJ_BOOLEAN: - if((stat = NCZ_convert1(src, typeid, dst))) - goto done; break; - default: stat = THROW(NC_ENCZARR); goto done; + default: stat = (THROW(NC_ENCZARR)); goto done; } + if(countp) *countp = count; done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /* Extract type and data for an attribute */ static int -computeattrinfo(const char* name, NClist* atypes, NCjson* values, +computeattrinfo(const char* name, NClist* atypes, nc_type typehint, int purezarr, NCjson* values, nc_type* typeidp, size_t* typelenp, size_t* lenp, void** datap) { int stat = NC_NOERR; @@ -998,21 +1008,26 @@ computeattrinfo(const char* name, NClist* atypes, NCjson* values, void* data = NULL; nc_type typeid; + ZTRACE(3,"name=%s |atypes|=%u typehint=%d purezarr=%d values=|%s|",name,nclistlength(atypes),typehint,purezarr,NCJtotext(values)); + /* Get type info for the given att */ typeid = NC_NAT; for(i=0;i<nclistlength(atypes);i+=2) { const char* aname = nclistget(atypes,i); if(strcmp(aname,name)==0) { const char* atype = nclistget(atypes,i+1); - if((stat = ncz_dtype2typeinfo(atype,&typeid,NULL))) { - if((stat = ncz_nctype2typeinfo(atype,&typeid))) goto done; - } + if((stat = ncz_dtype2nctype(atype,typehint,purezarr,&typeid,NULL,NULL))) goto done; +// if((stat = ncz_nctypedecode(atype,&typeid))) goto done; break; } } - if(typeid >= NC_STRING) + if(typeid > NC_MAX_ATOMIC_TYPE) {stat = NC_EINTERNAL; goto done;} - if((stat = computeattrdata(&typeid, values, &typelen, &len, &data))) goto done; + /* Use the hint if given one */ + if(typeid == NC_NAT) + typeid = typehint; + + if((stat = computeattrdata(typehint, &typeid, values, &typelen, &len, &data))) goto done; if(typeidp) *typeidp = typeid; if(lenp) *lenp = len; @@ -1021,159 +1036,60 @@ computeattrinfo(const char* name, NClist* atypes, NCjson* values, done: nullfree(data); - return THROW(stat); + return ZUNTRACEX(THROW(stat),"typeid=%d typelen=%d len=%u",*typeidp,*typelenp,*lenp); } /* Extract data for an attribute */ static int -computeattrdata(nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* lenp, void** datap) +computeattrdata(nc_type typehint, nc_type* typeidp, NCjson* values, size_t* typelenp, size_t* countp, void** datap) { int stat = NC_NOERR; - size_t count; - void* data = NULL; + NCbytes* buf = ncbytesnew(); size_t typelen; nc_type typeid = NC_NAT; NCjson* jtext = NULL; int reclaimvalues = 0; + int isjson = 0; /* 1 => attribute value is neither scalar nor array of scalars */ + int count = 0; /* no. of attribute values */ + + ZTRACE(3,"typehint=%d typeid=%d values=|%s|",typehint,*typeidp,NCJtotext(values)); /* Get assumed type */ if(typeidp) typeid = *typeidp; - if(typeid == NC_NAT) if((stat = inferattrtype(values,&typeid))) goto done; - if(typeid == NC_NAT) {stat = NC_EBADTYPE; goto done;} + if(typeid == NC_NAT && !isjson) { + if((stat = NCZ_inferattrtype(values,typehint, &typeid))) goto done; + } + + /* See if this is a simple vector (or scalar) of atomic types */ + isjson = NCZ_iscomplexjson(values,typeid); + + if(isjson) { + /* Apply the JSON attribute convention and convert to JSON string */ + typeid = NC_CHAR; + if((stat = json_convention_read(values,&jtext))) goto done; + values = jtext; jtext = NULL; + reclaimvalues = 1; + } if((stat = NC4_inq_atomic_type(typeid, NULL, &typelen))) goto done; - /* Collect the length of the attribute; might be a singleton */ - switch (NCJsort(values)) { - case NCJ_ARRAY: - count = NCJlength(values); - break; - case NCJ_DICT: - /* Apply the JSON dictionary convention and convert to string */ - if((stat = read_dict(values,&jtext))) goto done; - values = jtext; jtext = NULL; - reclaimvalues = 1; - /* fall thru */ - case NCJ_STRING: /* requires special handling as an array of characters; also look out for empty string */ - if(typeid == NC_CHAR) { - count = strlen(NCJstring(values)); - if(count == 0) count = 1; /* Actually a single nul char, probably default fill value ugh!*/ - } else - count = 1; - break; - default: - count = 1; /* singleton */ - break; - } + /* Convert the JSON attribute values to the actual netcdf attribute bytes */ + if((stat = zconvert(values,typeid,typelen,&count,buf))) goto done; - if(count > 0 && data == NULL) { - /* Allocate data space */ - if(typeid == NC_CHAR) - data = malloc(typelen*(count+1)); - else - data = malloc(typelen*count); - if(data == NULL) - {stat = NC_ENOMEM; goto done;} - /* convert to target type */ - if((stat = zconvert(typeid, typelen, values, data))) - goto done; - } - if(lenp) *lenp = count; if(typelenp) *typelenp = typelen; - if(datap) {*datap = data; data = NULL;} if(typeidp) *typeidp = typeid; /* return possibly inferred type */ + if(countp) *countp = count; + if(datap) *datap = ncbytesextract(buf); done: + ncbytesfree(buf); if(reclaimvalues) NCJreclaim(values); /* we created it */ - nullfree(data); - return THROW(stat); + return ZUNTRACEX(THROW(stat),"typelen=%d count=%u",(typelenp?*typelenp:0),(countp?*countp:-1)); } -static int -inferattrtype(NCjson* value, nc_type* typeidp) -{ - int stat = NC_NOERR; - nc_type typeid; - NCjson* j = NULL; - unsigned long long u64; - long long i64; - int negative = 0; - - if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) - {typeid = NC_NAT; goto done;} - - if(NCJsort(value) == NCJ_NULL) - {typeid = NC_NAT; goto done;} - - if(value->sort == NCJ_ARRAY) { - j=NCJith(value,0); - return inferattrtype(j,typeidp); - } - - switch (NCJsort(value)) { - case NCJ_NULL: - typeid = NC_CHAR; - return NC_NOERR; - case NCJ_DICT: - typeid = NC_CHAR; - goto done; - case NCJ_UNDEF: - return NC_EINVAL; - default: /* atomic */ - break; - } - if(NCJstring(value) != NULL) - negative = (NCJstring(value)[0] == '-'); - switch (value->sort) { - case NCJ_INT: - if(negative) { - sscanf(NCJstring(value),"%lld",&i64); - u64 = (unsigned long long)i64; - } else - sscanf(NCJstring(value),"%llu",&u64); - typeid = mininttype(u64,negative); - break; - case NCJ_DOUBLE: - typeid = NC_DOUBLE; - break; - case NCJ_BOOLEAN: - typeid = NC_UBYTE; - break; - case NCJ_STRING: /* requires special handling as an array of characters */ - typeid = NC_CHAR; - break; - default: - stat = NC_ENCZARR; - } -done: - if(typeidp) *typeidp = typeid; - return stat; -} - -static int -mininttype(unsigned long long u64, int negative) -{ - long long i64 = (long long)u64; /* keep bit pattern */ - if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; - if(i64 < 0) { - if(i64 >= NC_MIN_BYTE) return NC_BYTE; - if(i64 >= NC_MIN_SHORT) return NC_SHORT; - if(i64 >= NC_MIN_INT) return NC_INT; - return NC_INT64; - } - if(i64 <= NC_MAX_BYTE) return NC_BYTE; - if(i64 <= NC_MAX_UBYTE) return NC_UBYTE; - if(i64 <= NC_MAX_SHORT) return NC_SHORT; - if(i64 <= NC_MAX_USHORT) return NC_USHORT; - if(i64 <= NC_MAX_INT) return NC_INT; - if(i64 <= NC_MAX_UINT) return NC_UINT; - return NC_INT64; -} - - /** * @internal Read file data from map to memory. * @@ -1189,7 +1105,8 @@ ncz_read_file(NC_FILE_INFO_T* file) NCjson* json = NULL; LOG((3, "%s: file: %s", __func__, file->controller->path)); - + ZTRACE(3,"file=%s",file->controller->path); + /* _nczarr should already have been read in ncz_open_dataset */ /* Now load the groups starting with root */ @@ -1198,7 +1115,7 @@ ncz_read_file(NC_FILE_INFO_T* file) done: NCJreclaim(json); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1228,7 +1145,8 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) int v1 = 0; LOG((3, "%s: dims: %s", __func__, key)); - + ZTRACE(3,"file=%s grp=%s",file->controller->path,grp->hdr.name); + zinfo = file->format_file_info; map = zinfo->map; @@ -1255,9 +1173,10 @@ define_grp(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp) goto done; /* Read */ switch (stat=NCZ_downloadjson(map,key,&jgroup)) { - case NC_NOERR: /* we read it */ - /* Extract the NCZ_V2_GROUP dict */ + case NC_NOERR: /* Extract the NCZ_V2_GROUP dict */ if((stat = NCJdictget(jgroup,NCZ_V2_GROUP,&jdict))) goto done; + if(!stat && jdict == NULL) + {if((stat = NCJdictget(jgroup,NCZ_V2_GROUP_UC,&jdict))) goto done;} break; case NC_EEMPTY: /* does not exist, use search */ if((stat = parse_group_content_pure(zinfo,grp,varnames,subgrps))) @@ -1295,7 +1214,7 @@ done: nclistfreeall(subgrps); nullfree(fullpath); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } @@ -1316,6 +1235,9 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) char* fullpath = NULL; char* key = NULL; NCZ_FILE_INFO_T* zinfo = NULL; + NC_VAR_INFO_T* var = NULL; + NCZ_VAR_INFO_T* zvar = NULL; + NC_GRP_INFO_T* grp = NULL; NCZMAP* map = NULL; NC_ATT_INFO_T* att = NULL; NCindex* attlist = NULL; @@ -1325,14 +1247,24 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) size_t len, typelen; void* data = NULL; NC_ATT_INFO_T* fillvalueatt = NULL; + nc_type typehint = NC_NAT; + int purezarr; + + ZTRACE(3,"file=%s container=%s",file->controller->path,container->name); zinfo = file->format_file_info; map = zinfo->map; - if(container->sort == NCGRP) - attlist = ((NC_GRP_INFO_T*)container)->att; - else - attlist = ((NC_VAR_INFO_T*)container)->att; + purezarr = (zinfo->controls.flags & FLAG_PUREZARR)?1:0; + + if(container->sort == NCGRP) { + grp = ((NC_GRP_INFO_T*)container); + attlist = grp->att; + } else { + var = ((NC_VAR_INFO_T*)container); + zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); + attlist = var->att; + } switch ((stat = load_jatts(map, container, (zinfo->controls.flags & FLAG_NCZARR_V1), &jattrs, &atypes))) { case NC_NOERR: break; @@ -1344,31 +1276,36 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) if(jattrs != NULL) { /* Iterate over the attributes to create the in-memory attributes */ - /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray) */ + /* Watch for special cases: _FillValue and _ARRAY_DIMENSIONS (xarray), etc. */ for(i=0;i<NCJlength(jattrs);i+=2) { NCjson* key = NCJith(jattrs,i); NCjson* value = NCJith(jattrs,i+1); const NC_reservedatt* ra = NULL; + int isfillvalue = 0; + int isdfaltmaxstrlen = 0; + int ismaxstrlen = 0; + const char* aname = NCJstring(key); + /* See if this is a notable attribute */ + if(var != NULL && strcmp(aname,NC_ATT_FILLVALUE)==0) isfillvalue = 1; + if(grp != NULL && grp->parent == NULL && strcmp(aname,NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR)==0) + isdfaltmaxstrlen = 1; + if(var != NULL && strcmp(aname,NC_NCZARR_MAXSTRLEN_ATTR)==0) + ismaxstrlen = 1; /* See if this is reserved attribute */ - ra = NC_findreserved(NCJstring(key)); + ra = NC_findreserved(aname); if(ra != NULL) { - /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL, flags & READONLYFLAG */ - if(strcmp(NCJstring(key),NCPROPS)==0 - && container->sort == NCGRP - && file->root_grp == (NC_GRP_INFO_T*)container) { + /* case 1: name = _NCProperties, grp=root, varid==NC_GLOBAL */ + if(strcmp(aname,NCPROPS)==0 && grp != NULL && file->root_grp == grp) { /* Setup provenance */ if(NCJsort(value) != NCJ_STRING) - {stat = THROW(NC_ENCZARR); goto done;} /*malformed*/ - if((stat = NCZ_read_provenance(file,NCJstring(key),NCJstring(value)))) + {stat = (THROW(NC_ENCZARR)); goto done;} /*malformed*/ + if((stat = NCZ_read_provenance(file,aname,NCJstring(value)))) goto done; } /* case 2: name = _ARRAY_DIMENSIONS, sort==NCVAR, flags & HIDDENATTRFLAG */ - if(strcmp(NCJstring(key),NC_XARRAY_DIMS)==0 - && container->sort == NCVAR - && (ra->flags & HIDDENATTRFLAG)) { - /* store for later */ - NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)((NC_VAR_INFO_T*)container)->format_var_info; + if(strcmp(aname,NC_XARRAY_DIMS)==0 && var != NULL && (ra->flags & HIDDENATTRFLAG)) { + /* store for later */ int i; assert(NCJsort(value) == NCJ_ARRAY); if((zvar->xarray = nclistnew())==NULL) @@ -1379,19 +1316,28 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) nclistpush(zvar->xarray,strdup(NCJstring(k))); } } - /* else ignore */ - continue; + /* case other: if attribute is hidden */ + if(ra->flags & HIDDENATTRFLAG) continue; /* ignore it */ } + typehint = NC_NAT; + if(isfillvalue) + typehint = var->type_info->hdr.id ; /* if unknown use the var's type for _FillValue */ /* Create the attribute */ /* Collect the attribute's type and value */ - if((stat = computeattrinfo(NCJstring(key),atypes,value, + if((stat = computeattrinfo(aname,atypes,typehint,purezarr,value, &typeid,&typelen,&len,&data))) goto done; - if((stat = ncz_makeattr(container,attlist,NCJstring(key),typeid,len,data,&att))) + if((stat = ncz_makeattr(container,attlist,aname,typeid,len,data,&att))) goto done; - nullfree(data); data = NULL; /* passed to the attribute */ - /* Is this _FillValue ? */ - if(strcmp(att->hdr.name,_FillValue)==0) fillvalueatt = att; + /* No longer need this copy of the data */ + if((stat = nc_reclaim_data_all(file->controller->ext_ncid,att->nc_typeid,data,len))) goto done; + data = NULL; + if(isfillvalue) + fillvalueatt = att; + if(ismaxstrlen && att->nc_typeid == NC_INT) + zvar->maxstrlen = ((int*)att->data)[0]; + if(isdfaltmaxstrlen && att->nc_typeid == NC_INT) + zinfo->default_maxstrlen = ((int*)att->data)[0]; } } /* If we have not read a _FillValue, then go ahead and create it */ @@ -1407,12 +1353,13 @@ ncz_read_atts(NC_FILE_INFO_T* file, NC_OBJ* container) ((NC_GRP_INFO_T*)container)->atts_read = 1; done: + if(data != NULL) + stat = nc_reclaim_data(file->controller->ext_ncid,att->nc_typeid,data,len); NCJreclaim(jattrs); nclistfreeall(atypes); nullfree(fullpath); - nullfree(data); nullfree(key); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1430,6 +1377,8 @@ define_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* diminfo) { int i,stat = NC_NOERR; + ZTRACE(3,"file=%s grp=%s |diminfo|=%u",file->controller->path,grp->hdr.name,nclistlength(diminfo)); + /* Reify each dim in turn */ for(i = 0; i < nclistlength(diminfo); i+=2) { NC_DIM_INFO_T* dim = NULL; @@ -1449,7 +1398,7 @@ define_dims(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* diminfo) } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1480,15 +1429,19 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) int purezarr = 0; int xarray = 0; int formatv1 = 0; - nc_type typeid; + nc_type vtype; + int vtypelen; size64_t* shapes = NULL; int rank = 0; + int zarr_rank = 1; /* Need to watch out for scalars */ NClist* dimnames = nclistnew(); #ifdef ENABLE_NCZARR_FILTERS NCjson* jfilter = NULL; int chainindex; #endif + ZTRACE(3,"file=%s grp=%s |varnames|=%u",file->controller->path,grp->hdr.name,nclistlength(varnames)); + zinfo = file->format_file_info; map = zinfo->map; @@ -1500,8 +1453,6 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) for(i = 0; i < nclistlength(varnames); i++) { NC_VAR_INFO_T* var; const char* varname = nclistget(varnames,i); - - /* Create the NC_VAR_INFO_T object */ if((stat = nc4_var_list_add2(grp, varname, &var))) goto done; @@ -1542,41 +1493,110 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) if((stat = NCJdictget(jvar,"zarr_format",&jvalue))) goto done; sscanf(NCJstring(jvalue),"%d",&version); if(version != zinfo->zarr.zarr_version) - {stat = THROW(NC_ENCZARR); goto done;} + {stat = (THROW(NC_ENCZARR)); goto done;} } /* Set the type and endianness of the variable */ { - nc_type vtype; int endianness; if((stat = NCJdictget(jvar,"dtype",&jvalue))) goto done; /* Convert dtype to nc_type + endianness */ - if((stat = ncz_dtype2typeinfo(NCJstring(jvalue),&vtype,&endianness))) + if((stat = ncz_dtype2nctype(NCJstring(jvalue),NC_NAT,purezarr,&vtype,&endianness,&vtypelen))) goto done; - if(vtype > NC_NAT && vtype < NC_STRING) { + if(vtype > NC_NAT && vtype <= NC_MAX_ATOMIC_TYPE) { /* Locate the NC_TYPE_INFO_T object */ if((stat = ncz_gettype(file,grp,vtype,&var->type_info))) goto done; } else {stat = NC_EBADTYPE; goto done;} if(endianness == NC_ENDIAN_NATIVE) endianness = zinfo->native_endianness; + if(endianness == NC_ENDIAN_NATIVE) + endianness = (NCZ_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); if(endianness == NC_ENDIAN_LITTLE || endianness == NC_ENDIAN_BIG) { var->endianness = endianness; } else {stat = NC_EBADTYPE; goto done;} var->type_info->endianness = var->endianness; /* Propagate */ + if(vtype == NC_STRING) { + zvar->maxstrlen = vtypelen; + vtypelen = sizeof(char*); /* in-memory len */ + if(zvar->maxstrlen <= 0) zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var); + } } + + if(!purezarr) { + /* Extract the _NCZARR_ARRAY values */ + /* Do this first so we know about storage esp. scalar */ + if(formatv1) { + /* Construct the path to the zarray object */ + if((stat = nczm_concat(varpath,NCZARRAY,&key))) + goto done; + /* Download the nczarray object */ + if((stat=NCZ_readdict(map,key,&jncvar))) + goto done; + nullfree(key); key = NULL; + } else {/* format v2 */ + /* Extract the NCZ_V2_ARRAY dict */ + if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; + if(!stat && jncvar == NULL) + {if((stat = NCJdictget(jvar,NCZ_V2_ARRAY_UC,&jncvar))) goto done;} + } + if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} + assert((NCJsort(jncvar) == NCJ_DICT)); + /* Extract storage flag */ + if((stat = NCJdictget(jncvar,"storage",&jvalue))) + goto done; + if(jvalue != NULL) { + if(strcmp(NCJstring(jvalue),"chunked") == 0) { + var->storage = NC_CHUNKED; + } else if(strcmp(NCJstring(jvalue),"compact") == 0) { + var->storage = NC_COMPACT; + } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { + var->storage = NC_CONTIGUOUS; + zvar->scalar = 1; + } else { /*storage = NC_CONTIGUOUS;*/ + var->storage = NC_CONTIGUOUS; + } + } + /* Extract dimrefs list */ + switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { + case NC_NOERR: /* Extract the dimref names */ + assert((NCJsort(jdimrefs) == NCJ_ARRAY)); + if(zvar->scalar) { + assert(NCJlength(jdimrefs) == 0); + } else { + rank = NCJlength(jdimrefs); + for(j=0;j<rank;j++) { + const NCjson* dimpath = NCJith(jdimrefs,j); + assert(NCJsort(dimpath) == NCJ_STRING); + nclistpush(dimnames,strdup(NCJstring(dimpath))); + } + } + jdimrefs = NULL; /* avoid double free */ + break; + case NC_EEMPTY: /* will simulate it from the shape of the variable */ + stat = NC_NOERR; + break; + default: goto done; + } + jdimrefs = NULL; + } + /* shape */ { if((stat = NCJdictget(jvar,"shape",&jvalue))) goto done; - if(NCJsort(jvalue) != NCJ_ARRAY) {stat = THROW(NC_ENCZARR); goto done;} - /* Verify the rank */ - if(zvar->scalar) rank = 0; else rank = NCJlength(jvalue); - /* Set the rank of the variable */ + if(NCJsort(jvalue) != NCJ_ARRAY) {stat = (THROW(NC_ENCZARR)); goto done;} + if(zvar->scalar) { + rank = 0; + zarr_rank = 1; /* Zarr does not support scalars */ + } else + rank = (zarr_rank = NCJlength(jvalue)); + /* Save the rank of the variable */ if((stat = nc4_var_set_ndims(var, rank))) goto done; /* extract the shapes */ - if((shapes = (size64_t*)malloc(sizeof(size64_t)*rank)) == NULL) - {stat = THROW(NC_ENOMEM); goto done;} + if((shapes = (size64_t*)malloc(sizeof(size64_t)*zarr_rank)) == NULL) + {stat = (THROW(NC_ENOMEM)); goto done;} if((stat = decodeints(jvalue, shapes))) goto done; } + /* Capture dimension_separator (must precede chunk cache creation) */ { NCglobalstate* ngs = NC_getglobalstate(); @@ -1593,6 +1613,7 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) zvar->dimension_separator = ngs->zarr.dimension_separator; /* use global value */ assert(islegaldimsep(zvar->dimension_separator)); /* we are hosed */ } + /* fill_value; must precede calls to adjust cache */ { if((stat = NCJdictget(jvar,"fill_value",&jvalue))) goto done; @@ -1600,37 +1621,38 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) var->no_fill = 1; else { size_t fvlen; - typeid = var->type_info->hdr.id; + nc_type atypeid = vtype; var->no_fill = 0; - if((stat = computeattrdata(&typeid, jvalue, NULL, &fvlen, &var->fill_value))) + if((stat = computeattrdata(var->type_info->hdr.id, &atypeid, jvalue, NULL, &fvlen, &var->fill_value))) goto done; - assert(typeid == var->type_info->hdr.id); + assert(atypeid == vtype); /* Note that we do not create the _FillValue attribute here to avoid having to read all the attributes and thus foiling lazy read.*/ } } + /* chunks */ { - int rank; size64_t chunks[NC_MAX_VAR_DIMS]; if((stat = NCJdictget(jvar,"chunks",&jvalue))) goto done; if(jvalue != NULL && NCJsort(jvalue) != NCJ_ARRAY) - {stat = THROW(NC_ENCZARR); goto done;} + {stat = (THROW(NC_ENCZARR)); goto done;} /* Verify the rank */ - rank = NCJlength(jvalue); - if(rank > 0) { + assert (zarr_rank == NCJlength(jvalue)); + if(!zvar->scalar) { + if(zarr_rank == 0) {stat = NC_ENCZARR; goto done;} var->storage = NC_CHUNKED; - if(var->ndims+zvar->scalar != rank) - {stat = THROW(NC_ENCZARR); goto done;} - if((var->chunksizes = malloc(sizeof(size_t)*rank)) == NULL) + if(var->ndims != rank) + {stat = (THROW(NC_ENCZARR)); goto done;} + if((var->chunksizes = malloc(sizeof(size_t)*zarr_rank)) == NULL) {stat = NC_ENOMEM; goto done;} if((stat = decodeints(jvalue, chunks))) goto done; /* validate the chunk sizes */ zvar->chunkproduct = 1; for(j=0;j<rank;j++) { if(chunks[j] == 0 || chunks[j] > shapes[j]) - {stat = THROW(NC_ENCZARR); goto done;} + {stat = (THROW(NC_ENCZARR)); goto done;} var->chunksizes[j] = (size_t)chunks[j]; zvar->chunkproduct *= chunks[j]; } @@ -1690,62 +1712,14 @@ define_vars(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* varnames) #endif } - if(!purezarr) { - if(formatv1) { - /* Construct the path to the zarray object */ - if((stat = nczm_concat(varpath,NCZARRAY,&key))) - goto done; - /* Download the nczarray object */ - if((stat=NCZ_readdict(map,key,&jncvar))) - goto done; - nullfree(key); key = NULL; - } else {/* format v2 */ - /* Extract the NCZ_V2_ARRAY dict */ - if((stat = NCJdictget(jvar,NCZ_V2_ARRAY,&jncvar))) goto done; - } - if(jncvar == NULL) {stat = NC_ENCZARR; goto done;} - assert((NCJsort(jncvar) == NCJ_DICT)); - /* Extract storage flag */ - if((stat = NCJdictget(jncvar,"storage",&jvalue))) - goto done; - if(jvalue != NULL) { - if(strcmp(NCJstring(jvalue),"chunked") == 0) { - var->storage = NC_CHUNKED; - } else if(strcmp(NCJstring(jvalue),"compact") == 0) { - var->storage = NC_COMPACT; - } else if(strcmp(NCJstring(jvalue),"scalar") == 0) { - var->storage = NC_CONTIGUOUS; - zvar->scalar = 1; - } else { /*storage = NC_CONTIGUOUS;*/ - var->storage = NC_CONTIGUOUS; - } - } - /* Extract dimnames list */ - switch ((stat = NCJdictget(jncvar,"dimrefs",&jdimrefs))) { - case NC_NOERR: /* Extract the dimref names */ - assert((NCJsort(jdimrefs) == NCJ_ARRAY)); - assert(NCJlength(jdimrefs) == rank); - for(j=0;j<rank;j++) { - const NCjson* dimpath = NCJith(jdimrefs,j); - assert(NCJsort(dimpath) == NCJ_STRING); - nclistpush(dimnames,strdup(NCJstring(dimpath))); - } - jdimrefs = NULL; /* avoid double free */ - break; - case NC_EEMPTY: /* will simulate it from the shape of the variable */ - stat = NC_NOERR; - break; - default: goto done; - } - jdimrefs = NULL; - } - if((stat = computedimrefs(file, var, purezarr, xarray, rank, dimnames, shapes, var->dim))) goto done; - /* Extract the dimids */ - for(j=0;j<rank;j++) - var->dimids[j] = var->dim[j]->hdr.id; + if(!zvar->scalar) { + /* Extract the dimids */ + for(j=0;j<rank;j++) + var->dimids[j] = var->dim[j]->hdr.id; + } #ifdef ENABLE_NCZARR_FILTERS /* At this point, we can finalize the filters */ @@ -1766,7 +1740,7 @@ done: nclistfreeall(dimnames); NCJreclaim(jvar); if(formatv1) NCJreclaim(jncvar); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** @@ -1784,6 +1758,8 @@ define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subgrpnames) { int i,stat = NC_NOERR; + ZTRACE(3,"file=%s grp=%s |subgrpnames|=%u",file->controller->path,grp->hdr.name,nclistlength(subgrpnames)); + /* Load each subgroup name in turn */ for(i = 0; i < nclistlength(subgrpnames); i++) { NC_GRP_INFO_T* g = NULL; @@ -1807,7 +1783,7 @@ define_subgrps(NC_FILE_INFO_T* file, NC_GRP_INFO_T* grp, NClist* subgrpnames) } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } int @@ -1822,6 +1798,8 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) char* zarr_format = NULL; NCZ_FILE_INFO_T* zinfo = (NCZ_FILE_INFO_T*)file->format_file_info; + ZTRACE(3,"file=%s",file->controller->path); + /* See if the V1 META-Root is being used */ switch(stat = NCZ_downloadjson(zinfo->map, NCZMETAROOT, &jnczgroup)) { case NC_EEMPTY: /* not there */ @@ -1846,7 +1824,9 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) if(jzgroup != NULL) { /* See if this NCZarr V2 */ if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK,&jsuper))) goto done; - if(jsuper != NULL) { + if(!stat && jsuper == NULL) + {if((stat = NCJdictget(jzgroup,NCZ_V2_SUPERBLOCK_UC,&jsuper))) goto done;} + if(jsuper != NULL) { /* Extract the equivalent attribute */ if(jsuper->sort != NCJ_DICT) {stat = NC_ENCZARR; goto done;} @@ -1865,7 +1845,7 @@ ncz_read_superblock(NC_FILE_INFO_T* file, char** nczarrvp, char** zarrfp) /* Also means file is read only */ file->no_write = 1; } else if(jsuper != NULL) { - /* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */ + /* ! FLAG_NCZARR_V1 && ! FLAG_PUREZARR */ } if(nczarrvp) {*nczarrvp = nczarr_version; nczarr_version = NULL;} if(zarrfp) {*zarrfp = zarr_format; zarr_format = NULL;} @@ -1874,7 +1854,7 @@ done: nullfree(nczarr_version); NCJreclaim(jzgroup); NCJreclaim(jnczgroup); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /**************************************************/ @@ -1886,9 +1866,11 @@ parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* int i,stat = NC_NOERR; NCjson* jvalue = NULL; + ZTRACE(3,"jcontent=|%s| |dimdefs|=%u |varnames|=%u |subgrps|=%u",NCJtotext(jcontent),(unsigned)nclistlength(dimdefs),(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); + if((stat=NCJdictget(jcontent,"dims",&jvalue))) goto done; if(jvalue != NULL) { - if(NCJsort(jvalue) != NCJ_DICT) {stat = THROW(NC_ENCZARR); goto done;} + if(NCJsort(jvalue) != NCJ_DICT) {stat = (THROW(NC_ENCZARR)); goto done;} /* Extract the dimensions defined in this group */ for(i=0;i<NCJlength(jvalue);i+=2) { NCjson* jname = NCJith(jvalue,i); @@ -1934,7 +1916,7 @@ parse_group_content(NCjson* jcontent, NClist* dimdefs, NClist* varnames, NClist* } done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } static int @@ -1942,13 +1924,15 @@ parse_group_content_pure(NCZ_FILE_INFO_T* zinfo, NC_GRP_INFO_T* grp, NClist* va { int stat = NC_NOERR; + ZTRACE(3,"zinfo=%s grp=%s |varnames|=%u |subgrps|=%u",zinfo->common.file->controller->path,grp->hdr.name,(unsigned)nclistlength(varnames),(unsigned)nclistlength(subgrps)); + nclistclear(varnames); if((stat = searchvars(zinfo,grp,varnames))) goto done; nclistclear(subgrps); if((stat = searchsubgrps(zinfo,grp,subgrps))) goto done; done: - return THROW(stat); + return ZUNTRACE(THROW(stat)); } @@ -1981,7 +1965,7 @@ done: NCJreclaim(jvalue); nullfree(varkey); varkey = NULL; nullfree(zakey); zakey = NULL; - return THROW(stat); + return ZUNTRACE(THROW(stat)); } #endif @@ -2053,18 +2037,27 @@ done: return stat; } -/* Convert a list of integer strings to 64 bit integers */ +/* Convert a list of integer strings to 64 bit dimension sizes (shapes) */ static int decodeints(NCjson* jshape, size64_t* shapes) { int i, stat = NC_NOERR; for(i=0;i<NCJlength(jshape);i++) { - long long v; + struct ZCVT zcvt; + nc_type typeid = NC_NAT; NCjson* jv = NCJith(jshape,i); - if((stat = NCZ_convert1(jv,NC_INT64,(unsigned char*)&v))) goto done; - if(v < 0) {stat = THROW(NC_ENCZARR); goto done;} - shapes[i] = (size64_t)v; + if((stat = NCZ_json2cvt(jv,&zcvt,&typeid))) goto done; + switch (typeid) { + case NC_INT64: + if(zcvt.int64v < 0) {stat = (THROW(NC_ENCZARR)); goto done;} + shapes[i] = (size64_t)zcvt.int64v; + break; + case NC_UINT64: + shapes[i] = (size64_t)zcvt.uint64v; + break; + default: {stat = (THROW(NC_ENCZARR)); goto done;} + } } done: @@ -2186,7 +2179,8 @@ ncz_get_var_meta(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var) assert(file && var && var->format_var_info); LOG((3, "%s: var %s", __func__, var->hdr.name)); - + ZTRACE(3,"file=%s var=%s",file->controller->path,var->hdr.name); + /* Have we already read the var metadata? */ if (var->meta_read) return NC_NOERR; @@ -2291,6 +2285,8 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)(var->format_var_info); NCjson* jatts = NULL; + ZTRACE(3,"file=%s var=%s purezarr=%d xarray=%d ndims=%d shape=%s", + file->controller->path,var->hdr.name,purezarr,xarray,(int)ndims,nczprint_vector(ndims,shapes)); assert(zfile && zvar); if(purezarr && xarray) {/* Read in the attributes to get xarray dimdef attribute; Note that it might not exist */ @@ -2327,29 +2323,23 @@ computedimrefs(NC_FILE_INFO_T* file, NC_VAR_INFO_T* var, int purezarr, int xarra done: NCJreclaim(jatts); - return THROW(stat); + return ZUNTRACE(THROW(stat)); } /** -Implement the JSON convention for dictionaries. - -Reading: If the value of the attribute is a dictionary, then stringify - it as the value and make the attribute be of type "char". - -Writing: if the attribute is of type char and looks like a JSON dictionary, - then parse it as JSON and use that as its value in .zattrs. +Implement the JSON convention: +Stringify it as the value and make the attribute be of type "char". */ static int -read_dict(NCjson* jdict, NCjson** jtextp) +json_convention_read(NCjson* json, NCjson** jtextp) { int stat = NC_NOERR; NCjson* jtext = NULL; char* text = NULL; - if(jdict == NULL) {stat = NC_EINVAL; goto done;} - if(NCJsort(jdict) != NCJ_DICT) {stat = NC_EINVAL; goto done;} - if(NCJunparse(jdict,0,&text)) {stat = NC_EINVAL; goto done;} + if(json == NULL) {stat = NC_EINVAL; goto done;} + if(NCJunparse(json,0,&text)) {stat = NC_EINVAL; goto done;} if(NCJnewstring(NCJ_STRING,text,&jtext)) {stat = NC_EINVAL; goto done;} *jtextp = jtext; jtext = NULL; done: @@ -2358,20 +2348,45 @@ done: return stat; } +#if 0 +/** +Implement the JSON convention: +Parse it as JSON and use that as its value in .zattrs. +*/ static int -write_dict(size_t len, const void* data, NCjson** jsonp) +json_convention_write(size_t len, const void* data, NCjson** jsonp, int* isjsonp) { int stat = NC_NOERR; - NCjson* jdict = NULL; + NCjson* jexpr = NULL; + int isjson = 0; assert(jsonp != NULL); - if(NCJparsen(len,(char*)data,0,&jdict)) - {stat = NC_EINVAL; goto done;} - if(NCJsort(jdict) != NCJ_DICT) - {stat = NC_EINVAL; goto done;} - *jsonp = jdict; jdict = NULL; + if(NCJparsen(len,(char*)data,0,&jexpr)) { + /* Ok, just treat as sequence of chars */ + if((stat = NCJnewstringn(NCJ_STRING, len, data, &jexpr))) goto done; + } + isjson = 1; + *jsonp = jexpr; jexpr = NULL; + if(isjsonp) *isjsonp = isjson; done: - NCJreclaim(jdict); + NCJreclaim(jexpr); return stat; } +#endif +/* Convert an attribute "types list to an envv style list */ +static int +jtypes2atypes(NCjson* jtypes, NClist* atypes) +{ + int i, stat = NC_NOERR; + for(i=0;i<NCJlength(jtypes);i+=2) { + const NCjson* key = NCJith(jtypes,i); + const NCjson* value = NCJith(jtypes,i+1); + if(NCJsort(key) != NCJ_STRING) {stat = (THROW(NC_ENCZARR)); goto done;} + if(NCJsort(value) != NCJ_STRING) {stat = (THROW(NC_ENCZARR)); goto done;} + nclistpush(atypes,strdup(NCJstring(key))); + nclistpush(atypes,strdup(NCJstring(value))); + } +done: + return stat; +} diff --git a/libnczarr/ztype.c b/libnczarr/ztype.c index 12035dd0d..0f3d6e301 100644 --- a/libnczarr/ztype.c +++ b/libnczarr/ztype.c @@ -147,7 +147,7 @@ NCZ_inq_typeids(int ncid, int *ntypes, int *typeids) * @param size Size in bytes of new type. * @param name Name of new type. * @param base_typeid Base type ID. - * @param type_class NC_VLEN, NC_ENUM, or NC_STRING + * @param type_class NC_VLEN, NC_ENUM * @param typeidp Pointer that gets new type ID. * * @return ::NC_NOERR No error. diff --git a/libnczarr/zutil.c b/libnczarr/zutil.c index fb04dd38a..a6b115c11 100644 --- a/libnczarr/zutil.c +++ b/libnczarr/zutil.c @@ -17,20 +17,57 @@ /**************************************************/ /* Static zarr type name table */ -static const char* znames_little[NUM_ATOMIC_TYPES] = { -NULL, /*NC_NAT*/ -"<i1", /*NC_BYTE*/ "<U1", /*NC_CHAR*/ "<i2", /*NC_SHORT*/ -"<i4", /*NC_INT*/ "<f4", /*NC_FLOAT*/ "<f8", /*NC_DOUBLE*/ "<u1", /*NC_UBYTE*/ -"<u2", /*NC_USHORT*/ "<u4", /*NC_UINT*/ "<i8", /*NC_INT64*/ "<u8", /*NC_UINT64*/ -NULL, /*NC_STRING*/ -}; +/* Table of nc_type X {Zarr,NCZarr} X endianness +Issue: Need to distinquish NC_STRING && MAXSTRLEN==1 from NC_CHAR +in a way that allows other Zarr implementations to read the data. -static const char* znames_big[NUM_ATOMIC_TYPES] = { -NULL, /*NC_NAT*/ -">i1", /*NC_BYTE*/ ">U1", /*NC_CHAR*/ ">i2", /*NC_SHORT*/ ">i4", /*NC_INT*/ -">f4", /*NC_FLOAT*/ ">f8", /*NC_DOUBLE*/ ">u1", /*NC_UBYTE*/ -">u2", /*NC_USHORT*/ ">u4", /*NC_UINT*/ ">i8", /*NC_INT64*/ ">u8", /*NC_UINT64*/ -NULL, /*NC_STRING*/ +Available info: +Write: we have the netcdf type, so there is no ambiguity. +Read: we have the variable type and also any attribute dtype, +but those types are ambiguous. +We also have the attribute vs variable type problem. +For pure zarr, we have to infer the type of an attribute, +so if we have "var:strattr = \"abcdef\"", then we need +to decide how to infer the type: NC_STRING vs NC_CHAR. + +Solution: +For variables and for NCZarr type attributes, distinquish by using: +* "|S1" for NC_CHAR. +* ">S1" for NC_STRING && MAXSTRLEN==1 +It is a bit of a hack to use endianness, but it should be ok since for +string/char, the endianness has no meaning. +Note that we could use "|U1", but since this is utf-16 or utf-32 +in python, it may cause problems when reading what amounts to utf-8. + +For attributes, we infer: +* NC_CHAR if the hint is 0 + - e.g. var:strattr = 'abcdef'" => NC_CHAR +* NC_STRING if hint is NC_STRING. + - e.g. string var:strattr = \"abc\", \"def\"" => NC_STRING + +Note also that if we read a pure zarr file we will probably always +see "|S1", so we will never see a variable of type NC_STRING with length == 1. +We might however see an attribute of type string. +*/ +static const struct ZTYPES { + char* zarr[3]; + char* nczarr[3]; +} znames[NUM_ATOMIC_TYPES] = { +/* nc_type Pure Zarr NCZarr + NE LE BE NE LE BE*/ +/*NC_NAT*/ {{NULL,NULL,NULL}, {NULL,NULL,NULL}}, +/*NC_BYTE*/ {{"|i1","<i1",">i1"},{"|i1","<i1",">i1"}}, +/*NC_CHAR*/ {{"|S1","|S1","|S1"},{"|S1","|S1","|S1"}}, +/*NC_SHORT*/ {{"|i2","<i2",">i2"},{"|i2","<i2",">i2"}}, +/*NC_INT*/ {{"|i4","<i4",">i4"},{"|i4","<i4",">i4"}}, +/*NC_FLOAT*/ {{"|f4","<f4",">f4"},{"|f4","<f4",">f4"}}, +/*NC_DOUBLE*/ {{"|f8","<f8",">f8"},{"|f8","<f8",">f8"}}, +/*NC_UBYTE*/ {{"|u1","<u1",">u1"},{"|u1","<u1",">u1"}}, +/*NC_USHORT*/ {{"|u2","<u2",">u2"},{"|u2","<u2",">u2"}}, +/*NC_UINT*/ {{"|u4","<u4",">u4"},{"|u4","<u4",">u4"}}, +/*NC_INT64*/ {{"|i8","<i8",">i8"},{"|i8","<i8",">i8"}}, +/*NC_UINT64*/ {{"|u8","<u8",">u8"},{"|u8","<u8",">u8"}}, +/*NC_STRING*/ {{">S%d",">S%d",">S%d"},{">S%d",">S%d",">S%d"}}, }; #if 0 @@ -47,6 +84,7 @@ NULL, /*NC_NAT*/ "4294967295", /*NC_UINT*/ "-9223372036854775806", /*NC_INT64*/ "18446744073709551614", /*NC_UINT64*/ +"", /*NC_STRING*/ }; #endif @@ -64,6 +102,7 @@ NCJ_INT, /*NC_USHORT*/ NCJ_INT, /*NC_UINT*/ NCJ_INT, /*NC_INT64*/ NCJ_INT, /*NC_UINT64*/ +NCJ_STRING, /*NC_STRING*/ }; /* Forward */ @@ -204,8 +243,8 @@ NCZ_downloadjson(NCZMAP* zmap, const char* key, NCjson** jsonp) goto done; content[len] = '\0'; - if((stat = NCJparse(content,0,&json))) - goto done; + if((stat = NCJparse(content,0,&json)) < 0) + {stat = NC_ENCZARR; goto done;} if(jsonp) {*jsonp = json; json = NULL;} @@ -370,28 +409,6 @@ done: return stat; } -/** -@internal Given an nc_type+other, produce the corresponding -zarr type name. -@param nctype - [in] nc_type -@param little - [in] 1=>little, 0 => big -@param namep - [out] pointer to hold pointer to the name -@return NC_NOERR -@author Dennis Heimbigner -*/ - -int -ncz_zarr_type_name(nc_type nctype, int little, const char** znamep) -{ - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; - if(little) { - if(znamep) *znamep = znames_little[nctype]; - } else { - if(znamep) *znamep = znames_big[nctype]; - } - return NC_NOERR; -} - #if 0 /** @internal Given an nc_type, produce the corresponding @@ -405,7 +422,7 @@ default fill value as a string. int ncz_default_fill_value(nc_type nctype, const char** dfaltp) { - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; + if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; if(dfaltp) *dfaltp = zfillvalue[nctype]; return NC_NOERR; } @@ -423,7 +440,7 @@ fill value JSON type int ncz_fill_value_sort(nc_type nctype, int* sortp) { - if(nctype <= 0 || nctype >= NC_STRING) return NC_EINVAL; + if(nctype <= 0 || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; if(sortp) *sortp = zjsonsort[nctype]; return NC_NOERR; } @@ -486,57 +503,123 @@ done: return stat; } +#if 0 +/* Convert a netcdf-4 type integer */ int -ncz_nctype2typeinfo(const char* snctype, nc_type* nctypep) +ncz_nctypedecode(const char* snctype, nc_type* nctypep) { unsigned nctype = 0; if(sscanf(snctype,"%u",&nctype)!=1) return NC_EINVAL; if(nctypep) *nctypep = nctype; return NC_NOERR; } +#endif + +/** +@internal Given an nc_type+other, produce the corresponding dtype string. +@param nctype - [in] nc_type +@param endianness - [in] endianness +@param purezarr - [in] 1=>pure zarr, 0 => nczarr +@param strlen - [in] max string length +@param namep - [out] pointer to hold pointer to the dtype; user frees +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ int -ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) +ncz_nctype2dtype(nc_type nctype, int endianness, int purezarr, int strlen, char** dnamep) +{ + char dname[64]; + char* format = NULL; + + if(nctype <= NC_NAT || nctype > NC_MAX_ATOMIC_TYPE) return NC_EINVAL; + if(purezarr) + format = znames[nctype].zarr[endianness]; + else + format = znames[nctype].nczarr[endianness]; + snprintf(dname,sizeof(dname),format,strlen); + if(dnamep) *dnamep = strdup(dname); + return NC_NOERR; +} + +/* +@internal Convert a numcodecs dtype spec to a corresponding nc_type. +@param nctype - [in] dtype the dtype to convert +@param nctype - [in] typehint help disambiguate char vs string +@param purezarr - [in] 1=>pure zarr, 0 => nczarr +@param nctypep - [out] hold corresponding type +@param endianp - [out] hold corresponding endianness +@param typelenp - [out] hold corresponding type size (for fixed length strings) +@return NC_NOERR +@return NC_EINVAL +@author Dennis Heimbigner +*/ + +int +ncz_dtype2nctype(const char* dtype, nc_type typehint, int purezarr, nc_type* nctypep, int* endianp, int* typelenp) { int stat = NC_NOERR; int typelen = 0; int count; char tchar; nc_type nctype = NC_NAT; - int endianness = 0; + int endianness = -1; + const char* p; + int n; - if(endianness) *endianp = NC_ENDIAN_NATIVE; + if(endianp) *endianp = NC_ENDIAN_NATIVE; if(nctypep) *nctypep = NC_NAT; if(dtype == NULL) goto zerr; - if(strlen(dtype) < 3) goto zerr; - switch (dtype[0]) { + p = dtype; + switch (*p++) { case '<': endianness = NC_ENDIAN_LITTLE; break; case '>': endianness = NC_ENDIAN_BIG; break; + case '=': endianness = NC_ENDIAN_NATIVE; break; case '|': endianness = NC_ENDIAN_NATIVE; break; - default: goto zerr; + default: p--; endianness = NC_ENDIAN_NATIVE; break; } + tchar = *p++; /* get the base type */ /* Decode the type length */ - count = sscanf(dtype+2,"%d",&typelen); - if(count != 1) goto zerr; - tchar = dtype[1]; - switch(typelen) { - case 1: - switch (tchar) { - case 'i': nctype = NC_BYTE; break; - case 'u': nctype = NC_UBYTE; break; - case 'U': nctype = NC_CHAR; break; - default: goto zerr; + count = sscanf(p,"%d%n",&typelen,&n); + if(count == 0) goto zerr; + p += n; + + /* Short circuit fixed length strings */ + if(tchar == 'S') { + /* Fixed length string */ + switch (typehint) { + case NC_CHAR: nctype = NC_CHAR; typelen = 1; break; + case NC_STRING: nctype = NC_STRING; break; + default: + if(typelen == 1) {/* so |S1 => NC_CHAR */ + if(purezarr || endianness == NC_ENDIAN_NATIVE) nctype = NC_CHAR; + } else + nctype = NC_STRING; } - break; - case 2: +#if 0 + } else if(tchar == 'U') {/*back compatibility*/ + if(purezarr || typelen != 1) goto zerr; + nctype = NC_CHAR; +#endif + } else { + switch(typelen) { + case 1: + switch (tchar) { + case 'i': nctype = NC_BYTE; break; + case 'u': nctype = NC_UBYTE; break; + default: goto zerr; + } + break; + case 2: switch (tchar) { case 'i': nctype = NC_SHORT; break; case 'u': nctype = NC_USHORT; break; default: goto zerr; } break; - case 4: + case 4: switch (tchar) { case 'i': nctype = NC_INT; break; case 'u': nctype = NC_UINT; break; @@ -544,7 +627,7 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) default: goto zerr; } break; - case 8: + case 8: switch (tchar) { case 'i': nctype = NC_INT64; break; case 'u': nctype = NC_UINT64; break; @@ -552,10 +635,16 @@ ncz_dtype2typeinfo(const char* dtype, nc_type* nctypep, int* endianp) default: goto zerr; } break; - default: goto zerr; + default: goto zerr; + } } + /* Convert NC_ENDIAN_NATIVE and NC_ENDIAN_NA */ + if(endianness == NC_ENDIAN_NATIVE) + endianness = (NC_isLittleEndian()?NC_ENDIAN_LITTLE:NC_ENDIAN_BIG); + if(nctypep) *nctypep = nctype; + if(typelenp) *typelenp = typelen; if(endianp) *endianp = endianness; done: @@ -565,7 +654,104 @@ zerr: goto done; } +/* Infer the attribute's type based +primarily on the first atomic value encountered +recursively. +*/ +int +NCZ_inferattrtype(NCjson* value, nc_type typehint, nc_type* typeidp) +{ + int i,stat = NC_NOERR; + nc_type typeid; + NCjson* j = NULL; + unsigned long long u64; + long long i64; + int negative = 0; + if(NCJsort(value) == NCJ_ARRAY && NCJlength(value) == 0) + {typeid = NC_NAT; goto done;} /* Empty array is illegal */ + + if(NCJsort(value) == NCJ_NULL) + {typeid = NC_NAT; goto done;} /* NULL is also illegal */ + + if(NCJsort(value) == NCJ_DICT) /* Complex JSON expr -- a dictionary */ + {typeid = NC_NAT; goto done;} + + /* If an array, make sure all the elements are simple */ + if(value->sort == NCJ_ARRAY) { + for(i=0;i<NCJlength(value);i++) { + j=NCJith(value,i); + if(!NCJisatomic(j)) + {typeid = NC_NAT; goto done;} + } + } + + /* Infer from first element */ + if(value->sort == NCJ_ARRAY) { + j=NCJith(value,0); + return NCZ_inferattrtype(j,typehint,typeidp); + } + + /* At this point, value is a primitive JSON Value */ + + switch (NCJsort(value)) { + case NCJ_NULL: + typeid = NC_NAT; + return NC_NOERR; + case NCJ_DICT: + typeid = NC_CHAR; + goto done; + case NCJ_UNDEF: + return NC_EINVAL; + default: /* atomic */ + break; + } + + if(NCJstring(value) != NULL) + negative = (NCJstring(value)[0] == '-'); + switch (value->sort) { + case NCJ_INT: + if(negative) { + sscanf(NCJstring(value),"%lld",&i64); + u64 = (unsigned long long)i64; + } else + sscanf(NCJstring(value),"%llu",&u64); + typeid = NCZ_inferinttype(u64,negative); + break; + case NCJ_DOUBLE: + typeid = NC_DOUBLE; + break; + case NCJ_BOOLEAN: + typeid = NC_UBYTE; + break; + case NCJ_STRING: /* requires special handling as an array of characters */ + typeid = NC_CHAR; + break; + default: + stat = NC_ENCZARR; + } +done: + if(typeidp) *typeidp = typeid; + return stat; +} + +/* Infer the int type from the value; + minimum type will be int. +*/ +int +NCZ_inferinttype(unsigned long long u64, int negative) +{ + long long i64 = (long long)u64; /* keep bit pattern */ + if(!negative && u64 >= NC_MAX_INT64) return NC_UINT64; + if(i64 < 0) { + if(i64 >= NC_MIN_INT) return NC_INT; + return NC_INT64; + } + if(i64 <= NC_MAX_INT) return NC_INT; + if(i64 <= NC_MAX_UINT) return NC_UINT; + return NC_INT64; +} + /** @internal Similar to NCZ_grppath, but using group ids. @param gid - [in] group id @@ -788,3 +974,139 @@ done: if(dst) (void)nc_reclaim_data_all(ncid,tid,dst,1); return stat; } + + +/* Get max str len for a variable or grp */ +/* Has side effect of setting values in the + internal data structures */ +int +NCZ_get_maxstrlen(NC_OBJ* obj) +{ + int maxstrlen = 0; + assert(obj->sort == NCGRP || obj->sort == NCVAR); + if(obj->sort == NCGRP) { + NC_GRP_INFO_T* grp = (NC_GRP_INFO_T*)obj; + NC_FILE_INFO_T* file = grp->nc4_info; + NCZ_FILE_INFO_T* zfile = (NCZ_FILE_INFO_T*)file->format_file_info; + if(zfile->default_maxstrlen == 0) + zfile->default_maxstrlen = NCZ_MAXSTR_DEFAULT; + maxstrlen = zfile->default_maxstrlen; + } else { /*(obj->sort == NCVAR)*/ + NC_VAR_INFO_T* var = (NC_VAR_INFO_T*)obj; + NCZ_VAR_INFO_T* zvar = (NCZ_VAR_INFO_T*)var->format_var_info; + if(zvar->maxstrlen == 0) + zvar->maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)var->container); + maxstrlen = zvar->maxstrlen; + } + return maxstrlen; +} + +int +NCZ_fixed2char(const void* fixed, char** charp, size_t count, int maxstrlen) +{ + size_t i; + unsigned char* sp = NULL; + const unsigned char* p = fixed; + memset((void*)charp,0,sizeof(char*)*count); + for(i=0;i<count;i++,p+=maxstrlen) { + if(p[0] == '\0') { + sp = NULL; + } else { + if((sp = (unsigned char*)malloc(maxstrlen+1))==NULL) /* ensure null terminated */ + return NC_ENOMEM; + memcpy(sp,p,maxstrlen); + sp[maxstrlen] = '\0'; + } + charp[i] = sp; + sp = NULL; + } + return NC_NOERR; +} + +int +NCZ_char2fixed(const char** charp, void* fixed, size_t count, int maxstrlen) +{ + size_t i; + unsigned char* p = fixed; + memset(fixed,0,maxstrlen*count); /* clear target */ + for(i=0;i<count;i++,p+=maxstrlen) { + size_t len; + if(charp[i] != NULL) { + len = strlen(charp[i]); + if(len > maxstrlen) len = maxstrlen; + memcpy(p,charp[i],len); + } else { + memset(p,'\0',maxstrlen); + } + } + return NC_NOERR; +} + +/* +Wrap NC_copy_data, but take string value into account when overwriting +*/ +int +NCZ_copy_data(NC_FILE_INFO_T* file, NC_TYPE_INFO_T* xtype, const void* memory, size_t count, int noclear, void* copy) +{ + if(xtype->hdr.id == NC_STRING && !noclear) { + size_t i; + char** scopy = (char**)copy; + /* Reclaim any string fill values in copy */ + for(i=0;i<count;i++) { + nullfree(scopy[i]); + scopy[i] = NULL; + } + } + return nc_copy_data(file->controller->ext_ncid,xtype->hdr.id,memory,count,copy); +} + +#if 0 +/* Recursive helper */ +static int +checksimplejson(NCjson* json, int depth) +{ + int i; + + switch (NCJsort(json)) { + case NCJ_ARRAY: + if(depth > 0) return 0; /* e.g. [...,[...],...] or [...,{...},...] */ + for(i=0;i < NCJlength(json);i++) { + NCjson* j = NCJith(json,i); + if(!checksimplejson(j,depth+1)) return 0; + } + break; + case NCJ_DICT: + case NCJ_NULL: + case NCJ_UNDEF: + return 0; + default: break; + } + return 1; +} +#endif + +/* Return 1 if the attribute will be stored as a complex JSON valued attribute; return 0 otherwise */ +int +NCZ_iscomplexjson(NCjson* json, nc_type typehint) +{ + int i, stat = 0; + + switch (NCJsort(json)) { + case NCJ_ARRAY: + /* If the typehint is NC_CHAR, then always treat it as complex */ + if(typehint == NC_CHAR) {stat = 1; goto done;} + /* Otherwise see if it is a simple vector of atomic values */ + for(i=0;i < NCJlength(json);i++) { + NCjson* j = NCJith(json,i); + if(!NCJisatomic(j)) {stat = 1; goto done;} + } + break; + case NCJ_DICT: + case NCJ_NULL: + case NCJ_UNDEF: + stat = 1; goto done; + default: break; + } +done: + return stat; +} diff --git a/libnczarr/zvar.c b/libnczarr/zvar.c index da4ebba6f..dd6311346 100644 --- a/libnczarr/zvar.c +++ b/libnczarr/zvar.c @@ -12,6 +12,11 @@ #include "zincludes.h" #include <math.h> /* For pow() used below. */ +/* Mnemonics */ +#define CREATE 0 +#define NOCREATE 1 + + #ifdef LOGGING static void reportchunking(const char* title, NC_VAR_INFO_T* var) @@ -107,10 +112,7 @@ ncz_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var) double total_chunk_size; #endif - if (var->type_info->nc_type_class == NC_STRING) - type_size = sizeof(char *); - else - type_size = var->type_info->size; + type_size = var->type_info->size; #ifdef LOGGING /* Later this will become the total number of bytes in the default @@ -406,7 +408,7 @@ var->type_info->rc++; /* Set variables no_fill to match the database default unless the * variable type is variable length (NC_STRING or NC_VLEN) or is * user-defined type. */ - if (var->type_info->nc_type_class < NC_STRING) + if (var->type_info->nc_type_class <= NC_STRING) var->no_fill = (h5->fill_mode == NC_NOFILL); /* Assign dimensions to the variable. At the same time, check to @@ -473,7 +475,6 @@ exit: if (type) if ((retval = nc4_type_free(type))) BAILLOG(retval); - return ZUNTRACE(retval); } diff --git a/libnczarr/zwalk.c b/libnczarr/zwalk.c index 87f490c59..70d879a49 100644 --- a/libnczarr/zwalk.c +++ b/libnczarr/zwalk.c @@ -128,10 +128,16 @@ NCZ_transferslice(NC_VAR_INFO_T* var, int reading, common.swap = (zfile->native_endianness == var->endianness ? 0 : 1); common.chunkcount = 1; - for(r=0;r<common.rank+common.scalar;r++) { - if(common.scalar) - dimlens[r] = 1; - else + if(common.scalar) { + dimlens[0] = 1; + chunklens[0] = var->chunksizes[0]; + slices[0].start = 0; + slices[0].stride = 1; + slices[0].stop = 0; + slices[0].len = 1; + common.chunkcount = 1; + memshape[0] = 1; + } else for(r=0;r<common.rank;r++) { dimlens[r] = var->dim[r]->len; chunklens[r] = var->chunksizes[r]; slices[r].start = start[r]; @@ -221,7 +227,7 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) if((stat=wholechunk_indices(common,slices,chunkindices))) goto done; if(wdebug >= 1) fprintf(stderr,"case: wholechunk: chunkindices: %s\n",nczprint_vector(common->rank,chunkindices)); - /* Read the chunk */ + /* Read the chunk; handles fixed vs char* strings*/ switch ((stat = common->reader.read(common->reader.source, chunkindices, &chunkdata))) { case NC_EEMPTY: /* cache created the chunk */ break; @@ -232,9 +238,9 @@ NCZ_transfer(struct Common* common, NCZSlice* slices) memptr = ((unsigned char*)common->memory); slpptr = ((unsigned char*)chunkdata); if(common->reading) { - memcpy(memptr,slpptr,common->chunkcount*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr,common->chunkcount,!ZCLEAR,memptr))) goto done; } else { - memcpy(slpptr,memptr,common->chunkcount*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr,common->chunkcount,ZCLEAR,slpptr))) goto done; } // transfern(common,slpptr,memptr,common->chunkcount,1,chunkdata); if(zutest && zutest->tests & UTEST_WHOLECHUNK) @@ -410,15 +416,16 @@ NCZ_walk(NCZProjection** projv, NCZOdometer* chunkodom, NCZOdometer* slpodom, NC if(slpavail > 0) { if(wdebug > 0) wdebug2(common,slpptr0,memptr0,slpavail,laststride,chunkdata); if(common->reading) { - memcpy(memptr0,slpptr0,slpavail*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr0,slpavail,!ZCLEAR,memptr0))) goto done; } else { - memcpy(slpptr0,memptr0,slpavail*common->typesize); + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr0,slpavail,ZCLEAR,slpptr0))) goto done; } } // if((stat = transfern(common,slpptr0,memptr0,avail,nczodom_laststride(slpodom),chunkdata)))goto done; nczodom_next(memodom); nczodom_next(slpodom); } +done: return stat; } @@ -734,10 +741,11 @@ NCZ_transferscalar(struct Common* common) /* Figure out memory address */ memptr = ((unsigned char*)common->memory); slpptr = ((unsigned char*)chunkdata); - if(common->reading) - memcpy(memptr,slpptr,common->chunkcount*common->typesize); - else - memcpy(slpptr,memptr,common->chunkcount*common->typesize); + if(common->reading) { + if((stat=NCZ_copy_data(common->file,common->var->type_info,slpptr,common->chunkcount,!ZCLEAR,memptr))) goto done; + } else { + if((stat=NCZ_copy_data(common->file,common->var->type_info,memptr,common->chunkcount,ZCLEAR,slpptr))) goto done; + } done: return stat; @@ -749,7 +757,7 @@ NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata) { int stat = NC_NOERR; NC_VAR_INFO_T* var = NULL; - NCZ_VAR_INFO_T* zvar; + NCZ_VAR_INFO_T* zvar = NULL; struct NCZChunkCache* cache = NULL; void* cachedata = NULL; @@ -759,8 +767,9 @@ NCZ_read_chunk(int ncid, int varid, size64_t* zindices, void* chunkdata) cache = zvar->cache; if((stat = NCZ_read_cache_chunk(cache,zindices,&cachedata))) goto done; - if(chunkdata) - memcpy(chunkdata,cachedata,cache->chunksize); + if(chunkdata) { + if((stat = nc_copy_data(ncid,var->type_info->hdr.id,cachedata,cache->chunkcount,chunkdata))) goto done; + } done: return stat; diff --git a/libnczarr/zxcache.c b/libnczarr/zxcache.c index 52f8abf2e..374890be6 100644 --- a/libnczarr/zxcache.c +++ b/libnczarr/zxcache.c @@ -202,9 +202,14 @@ done: } static void -free_cache_entry(NCZCacheEntry* entry) +free_cache_entry(NCZChunkCache* cache, NCZCacheEntry* entry) { if(entry) { + int tid = cache->var->type_info->hdr.id; + if(tid == NC_STRING && !entry->isfixedstring) { + int ncid = cache->var->container->nc4_info->controller->ext_ncid; + nc_reclaim_data(ncid,tid,entry->data,cache->chunkcount); + } nullfree(entry->data); nullfree(entry->key.varkey); nullfree(entry->key.chunkkey); @@ -225,7 +230,7 @@ NCZ_free_chunk_cache(NCZChunkCache* cache) NCZCacheEntry* entry = nclistremove(cache->mru,0); (void)ncxcacheremove(cache->xcache,entry->hashkey,&ptr); assert(ptr == entry); - free_cache_entry(entry); + free_cache_entry(cache,entry); } #ifdef DEBUG fprintf(stderr,"|cache.free|=%ld\n",nclistlength(cache->mru)); @@ -303,7 +308,7 @@ fprintf(stderr,"|cache.read.lru|=%ld\n",nclistlength(cache->mru)); done: if(created && stat == NC_NOERR) stat = NC_EEMPTY; /* tell upper layers */ - if(entry) free_cache_entry(entry); + if(entry) free_cache_entry(cache,entry); return THROW(stat); } @@ -343,7 +348,7 @@ fprintf(stderr,"|cache.write|=%ld\n",nclistlength(cache->mru)); if((stat=makeroom(cache))) goto done; done: - if(entry) free_cache_entry(entry); + if(entry) free_cache_entry(cache,entry); return THROW(stat); } #endif @@ -449,6 +454,7 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) { int i, stat = NC_NOERR; NC_VAR_INFO_T* var = cache->var; + nc_type typeid = var->type_info->hdr.id; size_t typesize = var->type_info->size; if(cache->fillchunk) goto done; @@ -461,6 +467,11 @@ NCZ_ensure_fill_chunk(NCZChunkCache* cache) goto done; } if((stat = NCZ_ensure_fill_value(var))) goto done; + if(typeid == NC_STRING) { + char* src = *((char**)(var->fill_value)); + char** dst = (char**)(cache->fillchunk); + for(i=0;i<cache->chunkcount;i++) dst[i] = strdup(src); + } else switch (typesize) { case 1: { unsigned char c = *((unsigned char*)var->fill_value); @@ -597,6 +608,9 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) NCZ_FILE_INFO_T* zfile = NULL; NCZMAP* map = NULL; char* path = NULL; + nc_type tid = NC_NAT; + void* strchunk = NULL; + int ncid = 0; ZTRACE(5,"cache.var=%s entry.key=%s",cache->var->hdr.name,entry->key); LOG((3, "%s: var: %p", __func__, cache->var)); @@ -605,6 +619,26 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) zfile = file->format_file_info; map = zfile->map; + /* Collect some info */ + ncid = file->controller->ext_ncid; + tid = cache->var->type_info->hdr.id; + + if(tid == NC_STRING && !entry->isfixedstring) { + /* Convert from char* to char[strlen] format */ + int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + assert(maxstrlen > 0); + if((strchunk = malloc(cache->chunkcount*maxstrlen))==NULL) {stat = NC_ENOMEM; goto done;} + /* copy char* to char[] format */ + if((stat = NCZ_char2fixed((const char**)entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done; + /* Reclaim the old chunk */ + if((stat = nc_reclaim_data_all(ncid,tid,entry->data,cache->chunkcount))) goto done; + entry->data = NULL; + entry->data = strchunk; strchunk = NULL; + entry->size = cache->chunkcount * maxstrlen; + entry->isfixedstring = 1; + } + + #ifdef ENABLE_NCZARR_FILTERS /* Make sure the entry is in filtered state */ if(!entry->isfiltered) { @@ -636,6 +670,7 @@ put_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) default: goto done; } done: + nullfree(strchunk); nullfree(path); return ZUNTRACE(stat); } @@ -657,9 +692,12 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) NCZMAP* map = NULL; NC_FILE_INFO_T* file = NULL; NCZ_FILE_INFO_T* zfile = NULL; + NC_TYPE_INFO_T* xtype = NULL; + char** strchunk = NULL; size64_t size; int empty = 0; char* path = NULL; + int tid; ZTRACE(5,"cache.var=%s entry.key=%s sep=%d",cache->var->hdr.name,entry->key,cache->dimension_separator); @@ -670,22 +708,24 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) map = zfile->map; assert(map); + /* Collect some info */ + xtype = cache->var->type_info; + tid = xtype->hdr.id; + /* get size of the "raw" data on "disk" */ path = NCZ_chunkpath(entry->key); stat = nczmap_len(map,path,&size); nullfree(path); path = NULL; switch(stat) { - case NC_NOERR: break; + case NC_NOERR: entry->size = size; break; case NC_EEMPTY: empty = 1; stat = NC_NOERR; break; default: goto done; } if(!empty) { /* Make sure we have a place to read it */ - entry->size = size; - entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */ - if((entry->data = (void*)malloc(entry->size)) == NULL) - {stat = NC_ENOMEM; goto done;} + if((entry->data = (void*)calloc(1,entry->size)) == NULL) + {stat = NC_ENOMEM; goto done;} /* Read the raw data */ path = NCZ_chunkpath(entry->key); stat = nczmap_read(map,path,0,entry->size,(char*)entry->data); @@ -695,27 +735,32 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) case NC_EEMPTY: empty = 1; stat = NC_NOERR;break; default: goto done; } + entry->isfiltered = FILTERED(cache); /* Is the data being read filtered? */ + if(tid == NC_STRING) + entry->isfixedstring = 1; /* fill cache is in char[maxstrlen] format */ } if(empty) { /* fake the chunk */ entry->modified = (file->no_write?0:1); entry->size = cache->chunksize; - if((entry->data = (void*)malloc(entry->size)) == NULL) - {stat = NC_ENOMEM; goto done;} + entry->data = NULL; + entry->isfixedstring = 0; + entry->isfiltered = 0; /* apply fill value */ if(cache->fillchunk == NULL) {if((stat = NCZ_ensure_fill_chunk(cache))) goto done;} - memcpy(entry->data,cache->fillchunk,entry->size); - entry->isfiltered = 0; + if((entry->data = calloc(1,entry->size))==NULL) {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_copy_data(file,xtype,cache->fillchunk,cache->chunkcount,!ZCLEAR,entry->data))) goto done; stat = NC_NOERR; } #ifdef ENABLE_NCZARR_FILTERS /* Make sure the entry is in unfiltered state */ - if(entry->isfiltered) { + if(!empty && entry->isfiltered) { NC_VAR_INFO_T* var = cache->var; void* unfiltered = NULL; /* pointer to the unfiltered data */ void* filtered = NULL; /* pointer to the filtered data */ size_t unflen; /* length of unfiltered data */ + assert(tid != NC_STRING || entry->isfixedstring); /* Get the filter chain to apply */ NClist* filterchain = (NClist*)var->filters; if(nclistlength(filterchain) == 0) {stat = NC_EFILTER; goto done;} @@ -730,7 +775,24 @@ get_chunk(NCZChunkCache* cache, NCZCacheEntry* entry) } #endif + if(tid == NC_STRING && entry->isfixedstring) { + /* Convert from char[strlen] to char* format */ + int maxstrlen = NCZ_get_maxstrlen((NC_OBJ*)cache->var); + assert(maxstrlen > 0); + /* copy char[] to char* format */ + if((strchunk = (char**)malloc(sizeof(char*)*cache->chunkcount))==NULL) + {stat = NC_ENOMEM; goto done;} + if((stat = NCZ_fixed2char(entry->data,strchunk,cache->chunkcount,maxstrlen))) goto done; + /* Reclaim the old chunk */ + nullfree(entry->data); + entry->data = NULL; + entry->data = strchunk; strchunk = NULL; + entry->size = cache->chunkcount * sizeof(char*); + entry->isfixedstring = 0; + } + done: + nullfree(strchunk); nullfree(path); return ZUNTRACE(stat); } diff --git a/libsrc/posixio.c b/libsrc/posixio.c index 5e541faac..31dbf771b 100644 --- a/libsrc/posixio.c +++ b/libsrc/posixio.c @@ -15,6 +15,7 @@ #include <stdlib.h> #include <errno.h> #include <string.h> +#include <stdint.h> #ifdef HAVE_FCNTL_H #include <fcntl.h> @@ -120,7 +121,7 @@ static off_t nc_get_filelen(const int fd) { off_t flen; #ifdef HAVE_FILE_LENGTH_I64 - __int64 file_len = 0; + int64_t file_len = 0; if ((file_len = _filelengthi64(fd)) < 0) { return file_len; } @@ -1829,7 +1830,7 @@ ncio_px_filesize(ncio *nciop, off_t *filesizep) Use _filelengthi64 isntead. */ #ifdef HAVE_FILE_LENGTH_I64 - __int64 file_len = 0; + int64_t file_len = 0; if( (file_len = _filelengthi64(nciop->fd)) < 0) { return errno; } diff --git a/libsrc4/nc4internal.c b/libsrc4/nc4internal.c index a4ead8f80..bcb1d9af3 100644 --- a/libsrc4/nc4internal.c +++ b/libsrc4/nc4internal.c @@ -35,22 +35,24 @@ */ /** @internal List of reserved attributes. - WARNING: This list must be in sorted order for binary search. */ + WARNING: This list must be in (strcmp) sorted order for binary search. */ static const NC_reservedatt NC_reserved[] = { {NC_ATT_CLASS, READONLYFLAG|HIDDENATTRFLAG}, /*CLASS*/ {NC_ATT_DIMENSION_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*DIMENSION_LIST*/ {NC_ATT_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*NAME*/ {NC_ATT_REFERENCE_LIST, READONLYFLAG|HIDDENATTRFLAG}, /*REFERENCE_LIST*/ - {NC_XARRAY_DIMS, READONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/ - {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_Codecs*/ + {NC_XARRAY_DIMS, READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_ARRAY_DIMENSIONS*/ + {NC_ATT_CODECS, VARFLAG|READONLYFLAG|NAMEONLYFLAG}, /*_Codecs*/ {NC_ATT_FORMAT, READONLYFLAG}, /*_Format*/ {ISNETCDF4ATT, READONLYFLAG|NAMEONLYFLAG}, /*_IsNetcdf4*/ - {NCPROPS, READONLYFLAG|NAMEONLYFLAG|MATERIALIZEDFLAG}, /*_NCProperties*/ - {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR*/ - {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Coordinates*/ - {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG|MATERIALIZEDFLAG}, /*_Netcdf4Dimid*/ + {NCPROPS,READONLYFLAG|NAMEONLYFLAG|HIDDENATTRFLAG}, /*_NCProperties*/ + {NC_NCZARR_ATTR_UC, READONLYFLAG|HIDDENATTRFLAG}, /*_NCZARR_ATTR */ + {NC_ATT_COORDINATES, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Coordinates*/ + {NC_ATT_DIMID_NAME, READONLYFLAG|HIDDENATTRFLAG}, /*_Netcdf4Dimid*/ {SUPERBLOCKATT, READONLYFLAG|NAMEONLYFLAG}, /*_SuperblockVersion*/ - {NC_ATT_NC3_STRICT_NAME, READONLYFLAG|MATERIALIZEDFLAG}, /*_nc3_strict*/ + {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ + {NC_ATT_NC3_STRICT_NAME, READONLYFLAG}, /*_nc3_strict*/ + {NC_NCZARR_ATTR, READONLYFLAG|HIDDENATTRFLAG}, /*_nczarr_attr */ }; #define NRESERVED (sizeof(NC_reserved) / sizeof(NC_reservedatt)) /*|NC_reservedatt|*/ @@ -1592,7 +1594,7 @@ nc4_rec_grp_del_att_data(NC_GRP_INFO_T *grp) LOG((3, "%s: grp->name %s", __func__, grp->hdr.name)); /* Recursively call this function for each child, if any, stopping - * if there is an error. */ + * if there is an error. */ for (i = 0; i < ncindexsize(grp->children); i++) if ((retval = nc4_rec_grp_del_att_data((NC_GRP_INFO_T *)ncindexith(grp->children, i)))) return retval; diff --git a/nc_test/CMakeLists.txt b/nc_test/CMakeLists.txt index 96c7b6237..61bff2807 100644 --- a/nc_test/CMakeLists.txt +++ b/nc_test/CMakeLists.txt @@ -107,8 +107,10 @@ IF(BUILD_UTILITIES) ENDIF() IF(ENABLE_BYTERANGE) + IF(ENABLE_EXTERNAL_SERVER_TESTS) build_bin_test_no_prefix(tst_byterange) add_sh_test(nc_test test_byterange) + ENDIF() ENDIF() IF(BUILD_MMAP) diff --git a/nc_test/Makefile.am b/nc_test/Makefile.am index c72bd2300..e7e7e5dfc 100644 --- a/nc_test/Makefile.am +++ b/nc_test/Makefile.am @@ -75,26 +75,27 @@ endif TESTS = $(TESTPROGRAMS) -if ENABLE_BYTERANGE if BUILD_UTILITIES + +if ENABLE_BYTERANGE +if ENABLE_EXTERNAL_SERVER_TESTS tst_byterange_SOURCES = tst_byterange.c check_PROGRAMS += tst_byterange TESTS += test_byterange.sh endif endif -if BUILD_UTILITIES TESTS += run_diskless.sh run_diskless5.sh run_inmemory.sh if LARGE_FILE_TESTS if ! ENABLE_PARALLEL TESTS += run_diskless2.sh endif endif + if BUILD_MMAP TESTS += run_mmap.sh run_mmap.log: run_diskless.log endif -endif # If pnetcdf is enabled, these tests are run by a test # script. Otherwise, the are run by automake in the usual way. @@ -104,6 +105,8 @@ else TESTS += t_nc tst_atts3 tst_nofill nc_test tst_small endif +endif # BUILD_UTILITIES + # The .c files that are generated with m4 are already distributed, but # we also include the original m4 files, plus test scripts data. EXTRA_DIST = test_get.m4 test_put.m4 run_diskless.sh run_diskless2.sh \ diff --git a/nc_test/test_byterange.sh b/nc_test/test_byterange.sh index 28b5e6374..8c13cd348 100755 --- a/nc_test/test_byterange.sh +++ b/nc_test/test_byterange.sh @@ -5,11 +5,14 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi set -e +# Note: thredds-test is currently down and the URLs need to be replaced + # Test Urls +if test "x$FEATURE_THREDDSTEST" = x1 ; then URL3="https://thredds-test.unidata.ucar.edu/thredds/fileServer/pointData/cf_dsg/example/point.nc#mode=bytes&aws.profile=none" -#URL3a="https://remotetest.unidata.ucar.edu/thredds/fileServer/testdata/2004050300_eta_211.nc#bytes&aws.profile=none" -URL4a="https://s3.us-east-1.amazonaws.com/noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes&aws.profile=none" URL4b="https://thredds-test.unidata.ucar.edu/thredds/fileServer/irma/metar/files/METAR_20170910_0000.nc#bytes&aws.profile=none" +fi +URL4a="https://s3.us-east-1.amazonaws.com/noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes&aws.profile=none" URL4c="s3://noaa-goes16/ABI-L1b-RadC/2017/059/03/OR_ABI-L1b-RadC-M3C13_G16_s20170590337505_e20170590340289_c20170590340316.nc#mode=bytes&aws.profile=none" # Requires auth URL3b="s3://unidata-zarr-test-data/byterangefiles/upload3.nc#bytes&aws.profile=unidata" @@ -81,12 +84,16 @@ testsetup https://s3.us-east-1.amazonaws.com/unidata-zarr-test-data echo "*** Testing reading NetCDF-3 file with http" -echo "***Test remote classic file" -testbytes nc3 classic "$URL3" +if test "x$FEATURE_THREDDSTEST" = x1 ; then + echo "***Test remote classic file" + testbytes nc3 classic "$URL3" +fi if test "x$FEATURE_HDF5" = xyes ; then - echo "***Test remote netcdf-4 file: non-s3" - testbytes nc4b netCDF-4 "$URL4b" + if test "x$FEATURE_THREDDSTEST" = x1 ; then + echo "***Test remote netcdf-4 file: non-s3" + testbytes nc4b netCDF-4 "$URL4b" + fi fi if test "x$FEATURE_S3TESTS" = xyes && test "x$FEATURE_HDF5" = xyes ; then echo "***Test remote netdf-4 file: s3" diff --git a/ncdap_test/CMakeLists.txt b/ncdap_test/CMakeLists.txt index 8c4f1af31..ed07697e7 100644 --- a/ncdap_test/CMakeLists.txt +++ b/ncdap_test/CMakeLists.txt @@ -52,14 +52,14 @@ IF(ENABLE_TESTS) # not yet add_sh_test(ncdap tst_hyrax) add_sh_test(ncdap tst_fillmismatch) IF(ENABLE_DAP_LONG_TESTS) - add_sh_test(ncdap tst_longremote3) - add_bin_test(ncdap test_manyurls) - SET_TESTS_PROPERTIES(ncdap_tst_longremote3 ncdap_test_manyurls PROPERTIES RUN_SERIAL TRUE) + add_sh_test(ncdap tst_longremote3) + SET_TESTS_PROPERTIES(ncdap_tst_longremote3 PROPERTIES RUN_SERIAL TRUE) ENDIF(ENABLE_DAP_LONG_TESTS) - - - ENDIF(BUILD_UTILITIES) + IF(ENABLE_EXTERNAL_SERVER_TESTS) + add_bin_test(ncdap test_manyurls) + SET_TESTS_PROPERTIES(ncdap_test_manyurls PROPERTIES RUN_SERIAL TRUE) + ENDIF() IF(ENABLE_DAP_AUTH_TESTS) ##obsolete add_bin_test(ncdap t_auth) diff --git a/ncdap_test/Makefile.am b/ncdap_test/Makefile.am index 8cd39e51a..3d045b5ac 100644 --- a/ncdap_test/Makefile.am +++ b/ncdap_test/Makefile.am @@ -52,10 +52,12 @@ TESTS += test_partvar if ENABLE_DAP_LONG_TESTS TESTS += tst_longremote3.sh tst_longremote3.log: tst_remote3.log +if ENABLE_EXTERNAL_SERVER_TESTS test_manyurls_SOURCES = test_manyurls.c manyurls.h check_PROGRAMS += test_manyurls - TESTS += test_manyurls test_manyurls.log: tst_longremote3.log + TESTS += test_manyurls +endif endif test_partvar_SOURCES = test_partvar.c diff --git a/ncgen/Makefile.am b/ncgen/Makefile.am index 5beb613cb..49a3d6497 100644 --- a/ncgen/Makefile.am +++ b/ncgen/Makefile.am @@ -61,7 +61,7 @@ CLEANFILES = c0.nc c0_64.nc c0_4.nc c0_4c.nc ref_camrun.c \ makeparser:: rm -f ncgenl.c lex.ncg.c - flex -L -Pncg -8 ncgen.l + flex -Pncg -8 ncgen.l sed -e s/lex.ncg.c/ncgenl.c/g <lex.ncg.c >ncgenl.c bison -pncg -t -d ncgen.y rm -f ncgeny.c ncgeny.h diff --git a/ncgen/bindata.c b/ncgen/bindata.c index 4ed143f3a..49ead4dfc 100644 --- a/ncgen/bindata.c +++ b/ncgen/bindata.c @@ -422,6 +422,7 @@ done: return stat; } +#if 0 /** Internal equivalent of ncaux_reclaim_data. */ @@ -591,5 +592,8 @@ done: } #endif /*USE_NETCDF4*/ +#endif /*0*/ + + #endif /*ENABLE_BINARY*/ diff --git a/ncgen/ncgen.l b/ncgen/ncgen.l index 8d9ae574b..c46fb3863 100644 --- a/ncgen/ncgen.l +++ b/ncgen/ncgen.l @@ -481,7 +481,8 @@ done: return 0; if(tag == NC_NAT) { sprintf(errstr,"Illegal integer suffix: %s",stag); yyerror(errstr); - goto done; + //goto done; + return 0; } yytext[slen - strlen(stag)] = '\0'; if(xlen > 16) { /* truncate hi order digits */ @@ -527,7 +528,7 @@ done: return 0; } return lexdebug(FLOAT_CONST); } -\'[^\\]\' { +\'[^\\]\' { (void) sscanf((char*)&yytext[1],"%c",&byte_val); return lexdebug(BYTE_CONST); } @@ -559,8 +560,9 @@ done: return 0; case 'r': byte_val = '\r'; break; case 't': byte_val = '\t'; break; case 'v': byte_val = '\v'; break; + case '0' : byte_val = '\0'; break; case '\\': byte_val = '\\'; break; - case '?': byte_val = '\177'; break; + case '?': byte_val = '\177'; break; case '\'': byte_val = '\''; break; default: byte_val = (char)yytext[2]; } diff --git a/ncgen/ncgenl.c b/ncgen/ncgenl.c index 2eac82103..6ea3a2270 100644 --- a/ncgen/ncgenl.c +++ b/ncgen/ncgenl.c @@ -1,4 +1,6 @@ +#line 3 "ncgenl.c" + #define YY_INT_ALIGNED short int /* A lexical scanner generated by flex */ @@ -1582,6 +1584,8 @@ int yy_flex_debug = 0; #define YY_MORE_ADJ 0 #define YY_RESTORE_YY_MORE_OFFSET char *yytext; +#line 1 "ncgen.l" +#line 2 "ncgen.l" /********************************************************************* * Copyright 1993, UCAR/Unidata * See netcdf/COPYRIGHT file for copying and redistribution conditions. @@ -1727,6 +1731,8 @@ struct Specialtoken specials[] = { {NULL,0} /* null terminate */ }; +#line 1735 "ncgenl.c" + /* The most correct (validating) version of UTF8 character set (Taken from: http://www.w3.org/2005/03/23-lex-U) @@ -1768,6 +1774,7 @@ ID ([A-Za-z_]|{UTF8})([A-Z.@#\[\]a-z_0-9+-]|{UTF8})* /* Note: this definition of string will work for utf8 as well, although it is a very relaxed definition */ +#line 1778 "ncgenl.c" #define INITIAL 0 #define ST_C_COMMENT 1 @@ -1986,6 +1993,9 @@ YY_DECL } { +#line 226 "ncgen.l" + +#line 1999 "ncgenl.c" while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ { @@ -2044,12 +2054,14 @@ do_action: /* This label is used only to access EOF actions. */ case 1: YY_RULE_SETUP +#line 227 "ncgen.l" { /* whitespace */ break; } YY_BREAK case 2: YY_RULE_SETUP +#line 231 "ncgen.l" { /* comment */ break; } @@ -2057,6 +2069,7 @@ YY_RULE_SETUP case 3: /* rule 3 can match eol */ YY_RULE_SETUP +#line 235 "ncgen.l" {int len; char* s = NULL; /* In netcdf4, this will be used in a variety of places, so only remove escapes */ @@ -2080,6 +2093,7 @@ yytext[MAXTRST-1] = '\0'; YY_BREAK case 4: YY_RULE_SETUP +#line 256 "ncgen.l" { /* drop leading 0x; pad to even number of chars */ char* p = yytext+2; int len = yyleng - 2; @@ -2094,115 +2108,143 @@ YY_RULE_SETUP YY_BREAK case 5: YY_RULE_SETUP +#line 268 "ncgen.l" {return lexdebug(COMPOUND);} YY_BREAK case 6: YY_RULE_SETUP +#line 269 "ncgen.l" {return lexdebug(ENUM);} YY_BREAK case 7: YY_RULE_SETUP +#line 270 "ncgen.l" {return lexdebug(OPAQUE_);} YY_BREAK case 8: YY_RULE_SETUP +#line 272 "ncgen.l" {return lexdebug(FLOAT_K);} YY_BREAK case 9: YY_RULE_SETUP +#line 273 "ncgen.l" {return lexdebug(DOUBLE_K);} YY_BREAK case 10: YY_RULE_SETUP +#line 274 "ncgen.l" {return lexdebug(CHAR_K);} YY_BREAK case 11: YY_RULE_SETUP +#line 275 "ncgen.l" {return lexdebug(BYTE_K);} YY_BREAK case 12: YY_RULE_SETUP +#line 276 "ncgen.l" {return lexdebug(SHORT_K);} YY_BREAK case 13: YY_RULE_SETUP +#line 277 "ncgen.l" {return lexdebug(INT_K);} YY_BREAK case 14: YY_RULE_SETUP +#line 278 "ncgen.l" {return lexdebug(identcheck(UBYTE_K));} YY_BREAK case 15: YY_RULE_SETUP +#line 279 "ncgen.l" {return lexdebug(identcheck(USHORT_K));} YY_BREAK case 16: YY_RULE_SETUP +#line 280 "ncgen.l" {return lexdebug(identcheck(UINT_K));} YY_BREAK case 17: YY_RULE_SETUP +#line 281 "ncgen.l" {return lexdebug(identcheck(INT64_K));} YY_BREAK case 18: YY_RULE_SETUP +#line 282 "ncgen.l" {return lexdebug(identcheck(UINT64_K));} YY_BREAK case 19: YY_RULE_SETUP +#line 283 "ncgen.l" {return lexdebug(identcheck(STRING_K));} YY_BREAK case 20: YY_RULE_SETUP +#line 285 "ncgen.l" {return lexdebug(FLOAT_K);} YY_BREAK case 21: YY_RULE_SETUP +#line 286 "ncgen.l" {return lexdebug(INT_K);} YY_BREAK case 22: YY_RULE_SETUP +#line 287 "ncgen.l" {return lexdebug(INT_K);} YY_BREAK case 23: YY_RULE_SETUP +#line 288 "ncgen.l" {return lexdebug(identcheck(UINT_K));} YY_BREAK case 24: YY_RULE_SETUP +#line 289 "ncgen.l" {return lexdebug(identcheck(UINT_K));} YY_BREAK case 25: YY_RULE_SETUP +#line 292 "ncgen.l" {int32_val = -1; return lexdebug(NC_UNLIMITED_K);} YY_BREAK case 26: YY_RULE_SETUP +#line 295 "ncgen.l" {return lexdebug(TYPES);} YY_BREAK case 27: YY_RULE_SETUP +#line 296 "ncgen.l" {return lexdebug(DIMENSIONS);} YY_BREAK case 28: YY_RULE_SETUP +#line 297 "ncgen.l" {return lexdebug(VARIABLES);} YY_BREAK case 29: YY_RULE_SETUP +#line 298 "ncgen.l" {return lexdebug(DATA);} YY_BREAK case 30: YY_RULE_SETUP +#line 299 "ncgen.l" {return lexdebug(GROUP);} YY_BREAK case 31: YY_RULE_SETUP +#line 301 "ncgen.l" {BEGIN(TEXT);return lexdebug(NETCDF);} YY_BREAK case 32: YY_RULE_SETUP +#line 303 "ncgen.l" { /* missing value (pre-2.4 backward compatibility) */ if (yytext[0] == '-') { double_val = -INFINITY; @@ -2215,6 +2257,7 @@ YY_RULE_SETUP YY_BREAK case 33: YY_RULE_SETUP +#line 312 "ncgen.l" { /* missing value (pre-2.4 backward compatibility) */ double_val = NAN; specialconstants = 1; @@ -2223,6 +2266,7 @@ YY_RULE_SETUP YY_BREAK case 34: YY_RULE_SETUP +#line 318 "ncgen.l" {/* missing value (pre-2.4 backward compatibility)*/ if (yytext[0] == '-') { float_val = -INFINITYF; @@ -2235,6 +2279,7 @@ YY_RULE_SETUP YY_BREAK case 35: YY_RULE_SETUP +#line 327 "ncgen.l" { /* missing value (pre-2.4 backward compatibility) */ float_val = NANF; specialconstants = 1; @@ -2243,6 +2288,7 @@ YY_RULE_SETUP YY_BREAK case 36: YY_RULE_SETUP +#line 333 "ncgen.l" { #ifdef USE_NETCDF4 if(l_flag == L_C || l_flag == L_BINARY) @@ -2255,6 +2301,7 @@ YY_RULE_SETUP YY_BREAK case 37: YY_RULE_SETUP +#line 343 "ncgen.l" { bbClear(lextext); bbAppendn(lextext,(char*)yytext,yyleng+1); /* include null */ @@ -2265,6 +2312,7 @@ YY_RULE_SETUP YY_BREAK case 38: YY_RULE_SETUP +#line 352 "ncgen.l" {struct Specialtoken* st; bbClear(lextext); bbAppendn(lextext,(char*)yytext,yyleng+1); /* include null */ @@ -2278,6 +2326,7 @@ YY_RULE_SETUP case 39: /* rule 39 can match eol */ YY_RULE_SETUP +#line 362 "ncgen.l" { int c; char* p; char* q; @@ -2297,6 +2346,7 @@ YY_RULE_SETUP YY_BREAK case 40: YY_RULE_SETUP +#line 379 "ncgen.l" { char* id = NULL; int len; len = strlen(yytext); len = unescape(yytext,len,ISIDENT,&id); @@ -2311,6 +2361,7 @@ YY_RULE_SETUP YY_BREAK case 41: YY_RULE_SETUP +#line 391 "ncgen.l" { /* We need to try to see what size of integer ((u)int). @@ -2391,6 +2442,7 @@ done: return 0; YY_BREAK case 42: YY_RULE_SETUP +#line 469 "ncgen.l" { int c; int token = 0; @@ -2406,7 +2458,8 @@ YY_RULE_SETUP if(tag == NC_NAT) { sprintf(errstr,"Illegal integer suffix: %s",stag); yyerror(errstr); - goto done; + //goto done; + return 0; } yytext[slen - strlen(stag)] = '\0'; if(xlen > 16) { /* truncate hi order digits */ @@ -2441,6 +2494,7 @@ YY_RULE_SETUP YY_BREAK case 43: YY_RULE_SETUP +#line 517 "ncgen.l" { if (sscanf((char*)yytext, "%le", &double_val) != 1) { sprintf(errstr,"bad long or double constant: %s",(char*)yytext); @@ -2451,6 +2505,7 @@ YY_RULE_SETUP YY_BREAK case 44: YY_RULE_SETUP +#line 524 "ncgen.l" { if (sscanf((char*)yytext, "%e", &float_val) != 1) { sprintf(errstr,"bad float constant: %s",(char*)yytext); @@ -2462,6 +2517,7 @@ YY_RULE_SETUP case 45: /* rule 45 can match eol */ YY_RULE_SETUP +#line 531 "ncgen.l" { (void) sscanf((char*)&yytext[1],"%c",&byte_val); return lexdebug(BYTE_CONST); @@ -2469,6 +2525,7 @@ YY_RULE_SETUP YY_BREAK case 46: YY_RULE_SETUP +#line 535 "ncgen.l" { int oct = unescapeoct(&yytext[2]); if(oct < 0) { @@ -2481,6 +2538,7 @@ YY_RULE_SETUP YY_BREAK case 47: YY_RULE_SETUP +#line 544 "ncgen.l" { int hex = unescapehex(&yytext[3]); if(byte_val < 0) { @@ -2493,6 +2551,7 @@ YY_RULE_SETUP YY_BREAK case 48: YY_RULE_SETUP +#line 553 "ncgen.l" { switch ((char)yytext[2]) { case 'a': byte_val = '\007'; break; /* not everyone under- @@ -2503,8 +2562,9 @@ YY_RULE_SETUP case 'r': byte_val = '\r'; break; case 't': byte_val = '\t'; break; case 'v': byte_val = '\v'; break; + case '0' : byte_val = '\0'; break; case '\\': byte_val = '\\'; break; - case '?': byte_val = '\177'; break; + case '?': byte_val = '\177'; break; case '\'': byte_val = '\''; break; default: byte_val = (char)yytext[2]; } @@ -2514,6 +2574,7 @@ YY_RULE_SETUP case 49: /* rule 49 can match eol */ YY_RULE_SETUP +#line 572 "ncgen.l" { lineno++ ; break; @@ -2521,6 +2582,7 @@ YY_RULE_SETUP YY_BREAK case 50: YY_RULE_SETUP +#line 577 "ncgen.l" {/*initial*/ BEGIN(ST_C_COMMENT); break; @@ -2529,18 +2591,21 @@ YY_RULE_SETUP case 51: /* rule 51 can match eol */ YY_RULE_SETUP +#line 582 "ncgen.l" {/* continuation */ break; } YY_BREAK case 52: YY_RULE_SETUP +#line 586 "ncgen.l" {/* final */ BEGIN(INITIAL); break; } YY_BREAK case YY_STATE_EOF(ST_C_COMMENT): +#line 591 "ncgen.l" {/* final, error */ fprintf(stderr,"unterminated /**/ comment"); BEGIN(INITIAL); @@ -2549,14 +2614,17 @@ case YY_STATE_EOF(ST_C_COMMENT): YY_BREAK case 53: YY_RULE_SETUP +#line 597 "ncgen.l" {/* Note: this next rule will not work for UTF8 characters */ return lexdebug(yytext[0]) ; } YY_BREAK case 54: YY_RULE_SETUP +#line 600 "ncgen.l" ECHO; YY_BREAK +#line 2628 "ncgenl.c" case YY_STATE_EOF(INITIAL): case YY_STATE_EOF(TEXT): yyterminate(); @@ -3562,6 +3630,8 @@ void yyfree (void * ptr ) #define YYTABLES_NAME "yytables" +#line 600 "ncgen.l" + static int lexdebug(int token) { @@ -3660,6 +3730,7 @@ parseULL(int radix, char* text, int* failp) return uint64; } + /** Given the raw bits, the sign char, the tag, and hasU fill in the appropriate *_val field diff --git a/nczarr_test/CMakeLists.txt b/nczarr_test/CMakeLists.txt index ed41e4be3..8e79c8873 100644 --- a/nczarr_test/CMakeLists.txt +++ b/nczarr_test/CMakeLists.txt @@ -116,6 +116,7 @@ IF(ENABLE_TESTS) add_sh_test(nczarr_test run_misc) add_sh_test(nczarr_test run_nczarr_fill) add_sh_test(nczarr_test run_jsonconvention) + add_sh_test(nczarr_test run_strings) BUILD_BIN_TEST(test_quantize ${TSTCOMMONSRC}) add_sh_test(nczarr_test run_quantize) diff --git a/nczarr_test/Makefile.am b/nczarr_test/Makefile.am index ca43d28b3..81f684ea4 100644 --- a/nczarr_test/Makefile.am +++ b/nczarr_test/Makefile.am @@ -62,6 +62,7 @@ TESTS += run_interop.sh TESTS += run_misc.sh TESTS += run_nczarr_fill.sh TESTS += run_jsonconvention.sh +TESTS += run_strings.sh endif @@ -149,7 +150,9 @@ ref_xarray.cdl ref_purezarr.cdl ref_purezarr_base.cdl ref_nczarr2zarr.cdl \ ref_bzip2.cdl ref_filtered.cdl ref_multi.cdl \ ref_any.cdl ref_oldformat.cdl ref_oldformat.zip ref_newformatpure.cdl \ ref_groups.h5 ref_byte.zarr.zip ref_byte_fill_value_null.zarr.zip \ -ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl ref_jsonconvention.cdl +ref_groups_regular.cdl ref_byte.cdl ref_byte_fill_value_null.cdl \ +ref_jsonconvention.cdl ref_jsonconvention.zmap \ +ref_string.cdl # Interoperability files EXTRA_DIST += ref_power_901_constants_orig.zip ref_power_901_constants.cdl ref_quotes_orig.zip ref_quotes.cdl diff --git a/nczarr_test/ref_jsonconvention.cdl b/nczarr_test/ref_jsonconvention.cdl index c4a52b810..187fffd99 100644 --- a/nczarr_test/ref_jsonconvention.cdl +++ b/nczarr_test/ref_jsonconvention.cdl @@ -3,10 +3,16 @@ dimensions: d1 = 1 ; variables: int v(d1) ; - v:varconvention = "{\n\"key1\": [1,2,3], \"key2\": {\"key3\": \"abc\"}}" ; + v:varjson1 = "{\"key1\": [1,2,3], \"key2\": {\"key3\": \"abc\"}}" ; + v:varjson2 = "[[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]]" ; + v:varvec1 = "1.0, 0.0, 0.0" ; + v:varvec2 = "[0.,0.,1.]" ; // global attributes: - :grpconvention = "{\"key1\": [1,2,3], \n\"key2\": {\"key3\": \"abc\"}}" ; + :globalfloat = 1. ; + :globalfloatvec = 1., 2. ; + :globalchar = "abc" ; + :globalillegal = "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 " ; data: v = _ ; diff --git a/nczarr_test/ref_jsonconvention.zmap b/nczarr_test/ref_jsonconvention.zmap new file mode 100644 index 000000000..fed6cfba1 --- /dev/null +++ b/nczarr_test/ref_jsonconvention.zmap @@ -0,0 +1,5 @@ +[0] /.zattrs : (354) |{"globalfloat": 1, "globalfloatvec": [1,2], "globalchar": "abc", "globalillegal": "[ [ 1.0, 0.0, 0.0 ], [ 0.0, 1.0, 0.0 ], [ 0.0, 0.0, 1.0 ", "_NCProperties": "version=2,netcdf=4.9.1-development,nczarr=2.0.0", "_nczarr_attr": {"types": {"globalfloat": "<f8", "globalfloatvec": "<f8", "globalchar": "|S1", "globalillegal": "|S1", "_NCProperties": "|S1"}}}| +[1] /.zgroup : (129) |{"zarr_format": 2, "_nczarr_superblock": {"version": "2.0.0"}, "_nczarr_group": {"dims": {"d1": 1}, "vars": ["v"], "groups": []}}| +[3] /v/.zarray : (202) |{"zarr_format": 2, "shape": [1], "dtype": "<i4", "chunks": [1], "fill_value": -2147483647, "order": "C", "compressor": null, "filters": null, "_nczarr_array": {"dimrefs": ["/d1"], "storage": "chunked"}}| +[4] /v/.zattrs : (296) |{"varjson1": {"key1": [1,2,3], "key2": {"key3": "abc"}}, "varjson2": [[1.0,0.0,0.0],[0.0,1.0,0.0],[0.0,0.0,1.0]], "varvec1": "1.0, 0.0, 0.0", "varvec2": [0.,0.,1.], "_ARRAY_DIMENSIONS": ["d1"], "_nczarr_attr": {"types": {"varjson1": "|S1", "varjson2": "|S1", "varvec1": "|S1", "varvec2": "|S1"}}}| +[5] /v/0 : (4) (ubyte) |...| diff --git a/nczarr_test/ref_oldformat.zip b/nczarr_test/ref_oldformat.zip index ef3455eb9..ee423b281 100644 Binary files a/nczarr_test/ref_oldformat.zip and b/nczarr_test/ref_oldformat.zip differ diff --git a/nczarr_test/ref_quotes.cdl b/nczarr_test/ref_quotes.cdl index 97802cd76..9caeae45e 100644 --- a/nczarr_test/ref_quotes.cdl +++ b/nczarr_test/ref_quotes.cdl @@ -5,7 +5,7 @@ dimensions: lon = 30 ; variables: float fractional_snow_cover(time, lat, lon) ; - fractional_snow_cover:ID = 68b ; + fractional_snow_cover:ID = 68 ; fractional_snow_cover:esa_cci_path = NaN ; fractional_snow_cover:long_name = "Surface Fraction Covered by Snow" ; fractional_snow_cover:orig_attrs = "{\'comment\': \'Grid cell fractional snow cover based on the Globsnow CCI product.\', \'long_name\': \'Surface fraction covered by snow.\', \'project_name\': \'GlobSnow\', \'references\': \'Luojus, Kari, et al. \"ESA DUE Globsnow-Global Snow Database for Climate Research.\" ESA Special Publication. Vol. 686. 2010.\', \'source_name\': \'MFSC\', \'standard_name\': \'surface_snow_area_fraction\', \'units\': \'percent\', \'url\': \'http://www.globsnow.info/\'}" ; diff --git a/nczarr_test/ref_string.cdl b/nczarr_test/ref_string.cdl new file mode 100644 index 000000000..f8895ff2a --- /dev/null +++ b/nczarr_test/ref_string.cdl @@ -0,0 +1,17 @@ +netcdf ref_string { +dimensions: + d = 2 ; +variables: + char c(d); + string v(d) ; + +// global attributes: + string :stringattr = "abc", "def" ; + :charattr = "ghi", "jkl" ; + :_nczarr_default_maxstrlen = 6 ; +data: + + c = "a", "b" ; + + v = "uvw", "xyz" ; +} diff --git a/nczarr_test/ref_zarr_test_data.cdl.gz b/nczarr_test/ref_zarr_test_data.cdl.gz index 9d688eb0e..85ab24c24 100644 Binary files a/nczarr_test/ref_zarr_test_data.cdl.gz and b/nczarr_test/ref_zarr_test_data.cdl.gz differ diff --git a/nczarr_test/run_jsonconvention.sh b/nczarr_test/run_jsonconvention.sh index 7cf786a68..6521332e5 100755 --- a/nczarr_test/run_jsonconvention.sh +++ b/nczarr_test/run_jsonconvention.sh @@ -17,11 +17,14 @@ echo "*** Test: write then read using json convention" fileargs tmp_jsonconvention "mode=nczarr,$zext" deletemap $zext $file ${NCGEN} -4 -b -o "$fileurl" $srcdir/ref_jsonconvention.cdl +# Clean up extraneous changes wrt _NCProperties +${ZMD} -h $fileurl | sed -e 's/,key1=value1|key2=value2//' -e '/"_NCProperties"/ s/(378)/(354)/' > tmp_jsonconvention_${zext}.txt ${NCDUMP} $fileurl > tmp_jsonconvention_${zext}.cdl # remove '\n' from ref file before comparing rm -f tmp_jsonconvention.cdl sed -e 's|\\n||g' < ${srcdir}/ref_jsonconvention.cdl > tmp_jsonconvention.cdl diff -b tmp_jsonconvention.cdl tmp_jsonconvention_${zext}.cdl +diff -b ${srcdir}/ref_jsonconvention.zmap tmp_jsonconvention_${zext}.txt } testcase file diff --git a/nczarr_test/run_newformat.sh b/nczarr_test/run_newformat.sh index dbb93e99f..d5bc2ce76 100755 --- a/nczarr_test/run_newformat.sh +++ b/nczarr_test/run_newformat.sh @@ -3,7 +3,6 @@ if test "x$srcdir" = x ; then srcdir=`pwd`; fi . ../test_common.sh -set -x . "$srcdir/test_nczarr.sh" set -e diff --git a/nczarr_test/run_strings.sh b/nczarr_test/run_strings.sh new file mode 100755 index 000000000..c0b13568a --- /dev/null +++ b/nczarr_test/run_strings.sh @@ -0,0 +1,88 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +. "$srcdir/test_nczarr.sh" + +# This shell script tests support for the NC_STRING type + +set -e + +# Cvt stringattr to single char string +stringfixsa() { +rm -f $2 +sed -e '/:stringattr/ s|string :|:|' -e '/:stringattr/ s|", "||g' < $1 > $2 +} + +# Cvt stringattr to JSON format string +stringfixjsa() { +rm -f $2 +sed -e '/:stringattr/ s|string :|:|' -e '/:stringattr/ s|"|\\"|g' -e '/:stringattr/ s|= \(.*\);|= "\[\1\]" ;|' < $1 > $2 +} + +# Cvt v var data to single char string +stringfixv() { +rm -f $2 +sed -e '/v = / s|", "||g' < $1 > $2 +} + +# Cvt charattr to single char string +stringfixca() { +rm -f $2 +sed -e '/:charattr/ s|", "||g' <$1 > $2 +} + +# Cvt c var data to single char string +stringfixc() { +rm -f $2 +sed -e '/c = / s|", "||g' < $1 > $2 +} + +testcase() { +zext=$1 + +echo "*** Test: nczarr string write then read; format=$zext" +# Get pure zarr args +fileargs tmp_string_zarr "mode=zarr,$zext" +zarrurl="$fileurl" +zarrfile="$file" +# Get nczarr args +fileargs tmp_string_nczarr "mode=nczarr,$zext" +nczarrurl="$fileurl" +nczarrfile="$file" + +# setup +deletemap $zext $zarrfile +deletemap $zext $nczarrfile + +# Create alternate ref files +echo "*** create pure zarr file" +${NCGEN} -4 -b -o "$zarrurl" $srcdir/ref_string.cdl +echo "*** create nczarr file" +${NCGEN} -4 -b -o "$nczarrurl" $srcdir/ref_string.cdl + +echo "*** read purezarr" +${NCDUMP} -n ref_string $zarrurl > tmp_string_zarr_${zext}.cdl +${ZMD} -h $zarrurl > tmp_string_zarr_${zext}.txt +echo "*** read nczarr" +${NCDUMP} -n ref_string $nczarrurl > tmp_string_nczarr_${zext}.cdl +${ZMD} -h $nczarrurl > tmp_string_nczarr_${zext}.txt + +echo "*** convert for nczarr comparison" +stringfixca ${srcdir}/ref_string.cdl tmp_ref_string_ca.cdl +stringfixc tmp_ref_string_ca.cdl tmp_ref_string_cac.cdl + +echo "*** convert for zarr comparison" +stringfixjsa tmp_ref_string_cac.cdl tmp_ref_string_cacsa.cdl + +echo "*** verify" +diff -bw tmp_ref_string_cac.cdl tmp_string_nczarr_${zext}.cdl +diff -bw tmp_ref_string_cacsa.cdl tmp_string_zarr_${zext}.cdl +} + +testcase file +if test "x$FEATURE_NCZARR_ZIP" = xyes ; then testcase zip; fi +if test "x$FEATURE_S3TESTS" = xyes ; then testcase s3; fi + +exit 0 diff --git a/nczarr_test/zmapio.c b/nczarr_test/zmapio.c index c9c982c88..3131793e3 100644 --- a/nczarr_test/zmapio.c +++ b/nczarr_test/zmapio.c @@ -55,18 +55,21 @@ static struct Type { const char* typename; nc_type nctype; int typesize; + const char format[16]; } types[] = { -{"ubyte",NC_UBYTE,1}, -{"byte",NC_BYTE,1}, -{"ushort",NC_USHORT,2}, -{"short",NC_SHORT,2}, -{"uint",NC_UINT,4}, -{"int",NC_INT,4}, -{"uint64",NC_UINT64,8}, -{"int64",NC_INT64,8}, -{"float",NC_FLOAT,4}, -{"double",NC_DOUBLE,8}, -{NULL,NC_NAT,0} +{"ubyte",NC_UBYTE,1,"%u"}, +{"byte",NC_BYTE,1,"%d"}, +{"ushort",NC_USHORT,2,"%u"}, +{"short",NC_SHORT,2,"%d"}, +{"uint",NC_UINT,4,"%u"}, +{"int",NC_INT,4,"%d"}, +{"uint64",NC_UINT64,8,"%llu"}, +{"int64",NC_INT64,8,"%lld"}, +{"float",NC_FLOAT,4,"%f"}, +{"double",NC_DOUBLE,8,"%lf"}, +{"char",NC_CHAR,1,"'%c'"}, +{"string",NC_STRING,sizeof(char*),"%*s"}, +{NULL,NC_NAT,0,""} }; /* Command line options */ @@ -78,8 +81,10 @@ struct Dumpptions { NCZM_IMPL impl; char* rootpath; const struct Type* nctype; + char format[16]; int xflags; # define XNOZMETADATA 1 + int strlen; } dumpoptions; /* Forward */ @@ -120,9 +125,15 @@ decodeop(const char* name) } static const struct Type* -decodetype(const char* name) +decodetype(const char* name, int* strlenp) { struct Type* p = types; + + if(strncmp(name,"string/",strlen("string/"))==0) { + *strlenp = atoi(name+strlen("string/")); + name = "string"; + } + for(;p->typename != NULL;p++) { if(strcasecmp(p->typename,name)==0) return p; } @@ -138,9 +149,10 @@ main(int argc, char** argv) nc_initialize(); + /* Init options */ memset((void*)&dumpoptions,0,sizeof(dumpoptions)); - while ((c = getopt(argc, argv, "dhvx:t:T:X:")) != EOF) { + while ((c = getopt(argc, argv, "dhvx:t:F:T:X:")) != EOF) { switch(c) { case 'd': dumpoptions.debug = 1; @@ -148,17 +160,20 @@ main(int argc, char** argv) case 'h': dumpoptions.meta_only = 1; break; - case 'v': - zmapusage(); - goto done; case 't': - dumpoptions.nctype = decodetype(optarg); + dumpoptions.nctype = decodetype(optarg,&dumpoptions.strlen); if(dumpoptions.nctype == NULL) zmapusage(); break; case 'x': dumpoptions.mop = decodeop(optarg); if(dumpoptions.mop == MOP_NONE) zmapusage(); break; + case 'v': + zmapusage(); + goto done; + case 'F': + strcpy(dumpoptions.format,optarg); + break; case 'T': nctracelevel(atoi(optarg)); break; @@ -344,7 +359,7 @@ objdump(void) len = ceildiv(len,dumpoptions.nctype->typesize); } printf("[%d] %s : (%llu)",depth,obj,len); - if(kind == OK_CHUNK) + if(kind == OK_CHUNK && dumpoptions.nctype->nctype != NC_STRING) printf(" (%s)",dumpoptions.nctype->typename); printf(" |"); switch(kind) { @@ -434,25 +449,40 @@ static char hex[16] = "0123456789abcdef"; static void printcontent(size64_t len, const char* content, OBJKIND kind) { - size64_t i; + size64_t i, count; unsigned int c0,c1; - for(i=0;i<len;i++) { + const char* format = NULL; + int strlen = 1; + + format = dumpoptions.nctype->format; + if(dumpoptions.format[0] != '\0') + format = dumpoptions.format; + + if(dumpoptions.strlen > 0) { + strlen = dumpoptions.strlen; + count = ((len+strlen)-1)/strlen; + } else + count = len; + + for(i=0;i<count;i++) { /* If kind is chunk, then len is # of values, not # of bytes */ switch(kind) { case OK_CHUNK: if(i > 0) printf(", "); switch(dumpoptions.nctype->nctype) { - case NC_BYTE: printf("%d",((char*)content)[i]); break; - case NC_SHORT: printf("%d",((short*)content)[i]); break; - case NC_INT: printf("%d",((int*)content)[i]); break; - case NC_INT64: printf("%lld",((long long*)content)[i]); break; - case NC_UBYTE: printf("%u",((unsigned char*)content)[i]); break; - case NC_USHORT: printf("%u",((unsigned short*)content)[i]); break; - case NC_UINT: printf("%u",((unsigned int*)content)[i]); break; - case NC_UINT64: printf("%llu",((unsigned long long*)content)[i]); break; - case NC_FLOAT: printf("%f",((float*)content)[i]); break; - case NC_DOUBLE: printf("%lf",((double*)content)[i]); break; + case NC_BYTE: printf(format,((char*)content)[i]); break; + case NC_SHORT: printf(format,((short*)content)[i]); break; + case NC_INT: printf(format,((int*)content)[i]); break; + case NC_INT64: printf(format,((long long*)content)[i]); break; + case NC_UBYTE: printf(format,((unsigned char*)content)[i]); break; + case NC_USHORT: printf(format,((unsigned short*)content)[i]); break; + case NC_UINT: printf(format,((unsigned int*)content)[i]); break; + case NC_UINT64: printf(format,((unsigned long long*)content)[i]); break; + case NC_FLOAT: printf(format,((float*)content)[i]); break; + case NC_DOUBLE: printf(format,((double*)content)[i]); break; + case NC_CHAR: printf(format,((char*)content)[i]); break; + case NC_STRING: printf(format,(int)strlen,((char*)(&content[i*strlen]))); break; default: abort(); } break; diff --git a/test_common.in b/test_common.in index dc5be62e0..1df44654b 100644 --- a/test_common.in +++ b/test_common.in @@ -5,6 +5,10 @@ # See netcdf-c/COPYRIGHT file for more info. +# Load only once +if test "x$TEST_COMMON_SH" = x ; then +export TEST_COMMON_SH=1 + # Define location of execution TOPSRCDIR='@abs_top_srcdir@' TOPBUILDDIR='@abs_top_builddir@' @@ -32,6 +36,9 @@ FEATURE_NCZARR_ZIP=@DO_NCZARR_ZIP_TESTS@ FEATURE_FILTERTESTS=@DO_FILTER_TESTS@ FEATURE_PLUGIN_INSTALL_DIR=@PLUGIN_INSTALL_DIR@ +# Thredds-test server is currently disabled +#FEATURE_THREDDSTEST=1 + set -e # Figure out various locations in the src/build tree. @@ -170,5 +177,10 @@ avail() { if test yes = `${execdir}/../ncdump/ncfilteravail $1` ; then return 0 ; else echo "filter $1 not available" ; return 1; fi } +# Thredds-test is no longer available +#FEATURE_THREDDSTEST=1 + # Make sure we are in builddir (not execdir) cd $builddir + +fi #TEST_COMMON_SH