From 2b45c7ec841bfa3ea44d19966346bf5cb2b6375b Mon Sep 17 00:00:00 2001 From: Dennis Heimbigner Date: Fri, 29 Jul 2022 14:47:07 -0600 Subject: [PATCH] Fix support for reading arrays of HDF5 fixed size strings re: https://github.com/Unidata/netcdf-c/issues/2159 There was error in libhdf5 that only allowed reading a single value HDF5 fixed string. Fix to allow reading an array of such strings. Also make sure it still works for scalars and for attributes. Add a testcase: nc_test4/tst_fixedstring.sh. --- RELEASE_NOTES.md | 3 +- libhdf5/hdf5var.c | 39 ++++++++++++++--- libsrc4/nc4internal.c | 6 +-- nc_test4/CMakeLists.txt | 1 + nc_test4/Makefile.am | 18 +++++++- nc_test4/build_fixedstring.c | 81 +++++++++++++++++++++++++++++++++++ nc_test4/ref_fixedstring.cdl | 9 ++++ nc_test4/ref_fixedstring.h5 | Bin 0 -> 2057 bytes nc_test4/tst_fixedstring.sh | 15 +++++++ nc_test4/tst_vars3.c | 24 +++++------ 10 files changed, 172 insertions(+), 24 deletions(-) create mode 100644 nc_test4/build_fixedstring.c create mode 100644 nc_test4/ref_fixedstring.cdl create mode 100755 nc_test4/ref_fixedstring.h5 create mode 100755 nc_test4/tst_fixedstring.sh diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 4c263665c..cdac66ead 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,8 +7,9 @@ This file contains a high-level description of this package's evolution. Release ## 4.9.1 - T.B.D. +* [Bug Fix] Fix support for reading arrays of HDF5 fixed size strings. See [Github #????](https://github.com/Unidata/netcdf-c/pull/????). * [Bug Fix] Provide a default enum const when fill value does not match any enum constant for the value zero. See [Github #2462](https://github.com/Unidata/netcdf-c/pull/2462). -* [Bug Fi] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448). +* [Bug Fix] Fix the json submodule symbol conflicts between libnetcdf and the plugin specific netcdf_json.h. See [Github #2448](https://github.com/Unidata/netcdf-c/pull/2448). * [Bug Fix] Fix quantize with CLASSIC_MODEL files. See [Github #2405](https://github.com/Unidata/netcdf-c/pull/2445). * [Enhancement] Add `--disable-quantize` option to `configure`. * [Bug Fix] Fix CMakeLists.txt to handle all acceptable boolean values for -DPLUGIN_INSTALL_DIR. See [Github #2430](https://github.com/Unidata/netcdf-c/pull/2430). diff --git a/libhdf5/hdf5var.c b/libhdf5/hdf5var.c index 0dfd9ef80..3a8c07bd2 100644 --- a/libhdf5/hdf5var.c +++ b/libhdf5/hdf5var.c @@ -1880,6 +1880,9 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, void *bufr = NULL; int need_to_convert = 0; size_t len = 1; + int fixedlengthstring = 0; + hsize_t fstring_len = 0; + size_t fstring_count = 1; /* Find info for this file, group, and var. */ if ((retval = nc4_hdf5_find_grp_h5_var(ncid, varid, &h5, &grp, &var))) @@ -2068,14 +2071,21 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, H5Tget_size(hdf5_type->hdf_typeid) > 1 && !H5Tis_variable_str(hdf5_type->hdf_typeid)) { - hsize_t fstring_len; + size_t k; if ((fstring_len = H5Tget_size(hdf5_type->hdf_typeid)) == 0) BAIL(NC_EHDFERR); - if (!(*(char **)data = malloc(1 + fstring_len))) - BAIL(NC_ENOMEM); - bufr = *(char **)data; - } + /* Compute the total number of strings to read */ + if (var->ndims) { + for (k = 0; k < var->ndims; k++) { + fstring_count *= countp[k]; + } + } + /* Allocate space for the all the strings */ + if (!(bufr = malloc(fstring_len*fstring_count))) + BAIL(NC_ENOMEM); + fixedlengthstring = 1; + } /* Create the data transfer property list. */ if ((xfer_plistid = H5Pcreate(H5P_DATASET_XFER)) < 0) @@ -2128,6 +2138,24 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, } #endif /* USE_PARALLEL4 */ } + + /* If we read a sequence of fixed length strings, then we need to convert to char* in memory */ + if(fixedlengthstring) { + size_t k; + char** strvec = (char**)data; + char* p; + for(k=0;k < fstring_count;k++) { + char* eol; + if((p = (char*)malloc(1+fstring_len))==NULL) BAIL(NC_ENOMEM); + memcpy(p,((char*)bufr)+(k*fstring_len),fstring_len); + eol = p + fstring_len; + *eol = '\0'; + strvec[k] = p; p = NULL; + } + free(bufr); + bufr = NULL; + } + /* Now we need to fake up any further data that was asked for, using the fill values instead. First skip past the data we just read, if any. */ @@ -2207,6 +2235,7 @@ NC4_get_vars(int ncid, int varid, const size_t *startp, const size_t *countp, } exit: + if(fixedlengthstring && bufr) free(bufr); if (file_spaceid > 0) if (H5Sclose(file_spaceid) < 0) BAIL2(NC_EHDFERR); diff --git a/libsrc4/nc4internal.c b/libsrc4/nc4internal.c index b96291dc0..a4ead8f80 100644 --- a/libsrc4/nc4internal.c +++ b/libsrc4/nc4internal.c @@ -2106,11 +2106,11 @@ NC_createglobalstate(void) } /* Initialize struct pointers */ if((nc_globalstate->rcinfo = calloc(1,sizeof(struct NCRCinfo)))==NULL) - {stat = NC_ENOMEM; goto done;} + {stat = NC_ENOMEM; goto done;} if((nc_globalstate->rcinfo->entries = nclistnew())==NULL) - {stat = NC_ENOMEM; goto done;} + {stat = NC_ENOMEM; goto done;} if((nc_globalstate->rcinfo->s3profiles = nclistnew())==NULL) - {stat = NC_ENOMEM; goto done;} + {stat = NC_ENOMEM; goto done;} /* Get environment variables */ if(getenv(NCRCENVIGNORE) != NULL) diff --git a/nc_test4/CMakeLists.txt b/nc_test4/CMakeLists.txt index df4980265..f1fcde281 100644 --- a/nc_test4/CMakeLists.txt +++ b/nc_test4/CMakeLists.txt @@ -40,6 +40,7 @@ IF(BUILD_UTILITIES) ADD_SH_TEST(nc_test4 tst_misc) build_bin_test(tst_fillonly) ADD_SH_TEST(nc_test4 test_fillonly) + ADD_SH_TEST(nc_test4 tst_fixedstring) IF(USE_HDF5 AND ENABLE_FILTER_TESTING) build_bin_test(tst_filterparser) build_bin_test(test_filter) diff --git a/nc_test4/Makefile.am b/nc_test4/Makefile.am index c21c51d1f..fb5b24872 100644 --- a/nc_test4/Makefile.am +++ b/nc_test4/Makefile.am @@ -70,6 +70,9 @@ TESTS += run_grp_rename.sh tst_misc.sh check_PROGRAMS += tst_fillonly TESTS += test_fillonly.sh +# H5 and nczarr Fixed string support +TESTS += tst_fixedstring.sh + # Szip Tests (requires ncdump) if HAVE_H5Z_SZIP check_PROGRAMS += test_szip h5testszip @@ -112,7 +115,8 @@ ref_filter_repeat.txt ref_fillonly.cdl test_fillonly.sh \ ref_filter_order_create.txt ref_filter_order_read.txt \ ref_any.cdl tst_specific_filters.sh tst_unknown.sh \ tst_virtual_datasets.c noop1.cdl unknown.cdl \ -tst_broken_files.c +tst_broken_files.c \ +tst_fixedstring.sh ref_fixedstring.h5 ref_fixedstring.cdl # The tst_filterinstall test can only be run after an install # occurred with --with-plugin-dir enabled. So there is no point @@ -131,9 +135,19 @@ floats*.nc floats*.cdl shorts*.nc shorts*.cdl ints*.nc ints*.cdl \ testfilter_reg.nc filterrepeat.txt tmp_fillonly.nc \ testfilter_order.nc crfilterorder.txt rdfilterorder.txt 1 \ tmp_*.txt tmp_*.nc tmp*.dump tmp*.cdl tmp*.txt tmp*.tmp \ -tmp_bzip2.c bzip2.nc noop.nc tmp_*.dmp +tmp_bzip2.c bzip2.nc noop.nc tmp_*.dmp tmp_*.cdl DISTCLEANFILES = findplugin.sh run_par_test.sh # If valgrind is present, add valgrind targets. @VALGRIND_CHECK_RULES@ + +# The (otherwise unused) program build_fixedstring.c +# is used to generate the test file ref_fixedstring.h5. +# That test file is build and included as part of the distribution, +# so the build_fixedstring.c program generally does not need to +# be executed unless the test file needs to be modified.. + +check_PROGRAMS += build_fixedstring +ref_fixedstring.h5: build_fixedstring.c + ${srcdir}/buildfixedstring diff --git a/nc_test4/build_fixedstring.c b/nc_test4/build_fixedstring.c new file mode 100644 index 000000000..a237a4a8a --- /dev/null +++ b/nc_test4/build_fixedstring.c @@ -0,0 +1,81 @@ +/* This is part of the netCDF package. Copyright 2018 University + Corporation for Atmospheric Research/Unidata See COPYRIGHT file for + conditions of use. + + This program does HDF5 string stuff. + + Here's a HDF5 sample programs: + http://hdf.ncsa.uiuc.edu/training/other-ex5/sample-programs/strings.c +*/ +#include +#include "err_macros.h" +#include + +#define FILE_NAME "ref_fixedstring.h5" +#define DIM1_LEN 4 +#define ATT_NAME1 "att1" +#define ATT_NAMEN "attn" +#define GRP_NAME "group" + +int +main() +{ + hid_t fileid, typeid; + hid_t spaceid1, attid1; + hid_t spaceidn, attidn; + hid_t dataspaceid1, dataset1; + hid_t dataspaceidn, datasetn; + size_t type_size; + + const char data1[4] = "abcd"; + const char datan[16] = "abcdefghijklmnop"; + const hsize_t dims[1] = {4}; + + printf("\n*** Checking HDF5 fixed length string types.\n"); + + printf("*** Create HDF5 Dataset ..."); + + /* Open file. */ + if ((fileid = H5Fcreate(FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR; + + /* Create fixed string type. */ + if ((typeid = H5Tcopy(H5T_C_S1)) < 0) ERR; + type_size = 4 * sizeof(char); + if (H5Tset_size (typeid, type_size) < 0) ERR; + if (H5Tset_strpad (typeid, H5T_STR_NULLPAD) < 0) ERR; + + /* Write a scalar attribute of this type. */ + if ((spaceid1 = H5Screate(H5S_SCALAR)) < 0) ERR; + if ((attid1 = H5Acreate1(fileid, ATT_NAME1, typeid, spaceid1, H5P_DEFAULT)) < 0) ERR; + if (H5Awrite(attid1, typeid, &data1) < 0) ERR; + + /* Write a vector attribute of this type. */ + if ((spaceidn = H5Screate_simple(1,dims,NULL)) < 0) ERR; + if ((attidn = H5Acreate1(fileid, ATT_NAMEN, typeid, spaceidn, H5P_DEFAULT)) < 0) ERR; + if (H5Awrite(attidn, typeid, &datan) < 0) ERR; + + /* Write a scalar variable of this type. */ + if ((dataspaceid1 = H5Screate(H5S_SCALAR)) < 0) ERR; + dataset1 = H5Dcreate1(fileid, "v1", typeid, dataspaceid1, H5P_DEFAULT); + if (H5Dwrite (dataset1, typeid, H5S_ALL, H5S_ALL, H5P_DEFAULT, data1) < 0) ERR; + + /* Write a vector variable of this type. */ + if ((dataspaceidn = H5Screate_simple(1,dims,NULL)) < 0) ERR; + datasetn = H5Dcreate1(fileid, "vn", typeid, dataspaceidn, H5P_DEFAULT); + if (H5Dwrite (datasetn, typeid, H5S_ALL, H5S_ALL, H5P_DEFAULT, datan) < 0) ERR; + + /* Close everything up. */ + if (H5Dclose(datasetn) < 0) ERR; + if (H5Sclose(dataspaceidn) < 0) ERR; + if (H5Dclose(dataset1) < 0) ERR; + if (H5Sclose(dataspaceid1) < 0) ERR; + if (H5Aclose(attidn) < 0) ERR; + if (H5Sclose(spaceidn) < 0) ERR; + if (H5Aclose(attid1) < 0) ERR; + if (H5Sclose(spaceid1) < 0) ERR; + if (H5Tclose(typeid) < 0) ERR; + if (H5Fclose(fileid) < 0) ERR; + + SUMMARIZE_ERR; + FINAL_RESULTS; +} diff --git a/nc_test4/ref_fixedstring.cdl b/nc_test4/ref_fixedstring.cdl new file mode 100644 index 000000000..09f6fdc34 --- /dev/null +++ b/nc_test4/ref_fixedstring.cdl @@ -0,0 +1,9 @@ +netcdf ref_fixedstring { +dimensions: + phony_dim_0 = 3 ; +variables: + string test(phony_dim_0) ; +data: + + test = "foo", "bar", "baz" ; +} diff --git a/nc_test4/ref_fixedstring.h5 b/nc_test4/ref_fixedstring.h5 new file mode 100755 index 0000000000000000000000000000000000000000..b2cc269362476134b5ebc2b70af90d0902b6adaf GIT binary patch literal 2057 zcmeD5aB<`1lHy_j0S*oZ76t(@6Gr@p0!|Ky2#gPtPk=HQp>zk7Ucm%mFfxE31A_!q zTo7tLy1I}cS62q0N|^aD8mf)KfCa*WIs+y=N{^5b@Njhu0C_b6>R(tYJpoN;uwW=j zEiM7EVd>EWCP606$iNCQ3u+)EG$k`KLIjwh<|1eguzF#jG)Rn<0m5fuVghU6fa+&v z1WJGbC)5NOH3P;NlytD4zY8cw!}6g5%;SU7wWE$24S~@Rz!w5(`T0qSMM;TO0MGX@ Axc~qF literal 0 HcmV?d00001 diff --git a/nc_test4/tst_fixedstring.sh b/nc_test4/tst_fixedstring.sh new file mode 100755 index 000000000..f27e456e9 --- /dev/null +++ b/nc_test4/tst_fixedstring.sh @@ -0,0 +1,15 @@ +#!/bin/sh + +if test "x$srcdir" = x ; then srcdir=`pwd`; fi +. ../test_common.sh + +set -e + +# Note, the test file for this is ref_fixedstring.h5 +# But is is generated by the (otherwise unused) program +# ../h5_test/tst_h_fixedstrings.c. + +echo "*** Test reading a file with HDF5 fixed length strings" +rm -f ./tmp_fixedstring.cdl +$NCDUMP ${srcdir}/ref_fixedstring.h5 > ./tmp_fixedstring.cdl +diff -b -w ${srcdir}/ref_fixedstring.cdl ./tmp_fixedstring.cdl diff --git a/nc_test4/tst_vars3.c b/nc_test4/tst_vars3.c index 9c4af0751..19d51de13 100644 --- a/nc_test4/tst_vars3.c +++ b/nc_test4/tst_vars3.c @@ -418,7 +418,7 @@ main(int argc, char **argv) if (nc_close(ncid)) ERR; } SUMMARIZE_ERR; -#ifdef HAVE_H5Z_SZIP +#ifdef HAVE_H5Z_SZIP printf("**** testing simple szip filter setup..."); { int ncid; @@ -727,15 +727,18 @@ main(int argc, char **argv) } /* next mask */ } SUMMARIZE_ERR; -#else +#else /*!HAVE_H5Z_SZIP*/ /* This code is run if szip is not present in HDF5. It checks that * nc_def_var_szip() returns NC_ENOFILTER in that case. */ + /* WARNING: This code no longer works if plugins is enabled. This is + because the plugin szip wrapper will be found, thus confusing HDF5. + */ +#ifdef HAVE_SZIP printf("**** testing szip handling when szip not built..."); { int ncid; int dimid; int varid; - unsigned int params[NUM_PARAMS_IN]; /* Create a netcdf-4 file with one dimensions. */ if (nc_create(FILE_NAME, NC_NETCDF4, &ncid)) ERR; @@ -744,19 +747,14 @@ main(int argc, char **argv) /* Add a var. Try to turn on szip filter, but it will return * error. */ if (nc_def_var(ncid, V_SMALL, NC_INT64, NDIMS1, &dimid, &varid)) ERR; - params[0] = NC_SZIP_NN; /* options_mask */ - params[1] = NC_SZIP_EC_BPP_IN; /* pixels_per_block */ if (nc_def_var_chunking(ncid, varid, NC_CHUNKED, NULL)) ERR; - { int stat; - stat = nc_def_var_filter(ncid, varid, H5_FILTER_SZIP, NUM_PARAMS_IN, params); - if(stat != NC_ENOFILTER) - ERR; - } - if (nc_def_var_szip(ncid, varid, NC_SZIP_NN, - NC_SZIP_EC_BPP_IN) != NC_ENOFILTER) ERR; + int stat; + if ((stat=nc_def_var_szip(ncid, varid, NC_SZIP_NN, NC_SZIP_EC_BPP_IN)) != NC_ENOFILTER) + ERR; if (nc_close(ncid)) ERR; } SUMMARIZE_ERR; -#endif /* HAVE_SZ */ +#endif /*HAVE_SZIP*/ +#endif /*!HAVE_H5Z_SZIP*/ FINAL_RESULTS; }