Merge pull request #2655 from ZedThree/hdf5-transient-types

Add support for HDF5 transient types
This commit is contained in:
Ward Fisher 2023-07-18 16:49:33 -06:00 committed by GitHub
commit dc2b0f7608
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 153 additions and 9 deletions

View File

@ -13,6 +13,7 @@
#include "hdf5internal.h"
#include "hdf5err.h"
#include "hdf5debug.h"
#include "nc4internal.h"
#include "ncrc.h"
#include "ncauth.h"
#include "ncmodel.h"
@ -63,6 +64,7 @@ extern int NC4_open_image_file(NC_FILE_INFO_T* h5);
/* Defined later in this file. */
static int rec_read_metadata(NC_GRP_INFO_T *grp);
static int read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name);
/**
* @internal Struct to track HDF5 object info, for
@ -103,7 +105,7 @@ typedef struct {
* struct, either an existing one (for user-defined types) or a newly
* created one.
*
* @param h5 Pointer to HDF5 file info struct.
* @param h5_grp Pointer to group info struct.
* @param datasetid HDF5 dataset ID.
* @param type_info Pointer to pointer that gets type info struct.
*
@ -114,7 +116,7 @@ typedef struct {
* @author Ed Hartnett
*/
static int
get_type_info2(NC_FILE_INFO_T *h5, hid_t datasetid, NC_TYPE_INFO_T **type_info)
get_type_info2(NC_GRP_INFO_T *h5_grp, hid_t datasetid, NC_TYPE_INFO_T **type_info)
{
NC_HDF5_TYPE_INFO_T *hdf5_type;
htri_t is_str, equal = 0;
@ -123,7 +125,7 @@ get_type_info2(NC_FILE_INFO_T *h5, hid_t datasetid, NC_TYPE_INFO_T **type_info)
H5T_order_t order;
int t;
assert(h5 && type_info);
assert(h5_grp && type_info);
/* Because these N5T_NATIVE_* constants are actually function calls
* (!) in H5Tpublic.h, I can't initialize this array in the usual
@ -232,11 +234,24 @@ get_type_info2(NC_FILE_INFO_T *h5, hid_t datasetid, NC_TYPE_INFO_T **type_info)
else
{
NC_TYPE_INFO_T *type;
NC_FILE_INFO_T *h5 = h5_grp->nc4_info;
/* This is a user-defined type. */
if((type = nc4_rec_find_hdf_type(h5, native_typeid)))
*type_info = type;
/* If we didn't find the type, then it's probably a transient
* type, stored in the dataset itself, so let's read it now */
if (type == NULL) {
/* If we still can't read the type, ignore it, it probably
* means this object is a reference */
if (read_type(h5_grp, native_typeid, ""))
return NC_EBADTYPID;
if((type = nc4_rec_find_hdf_type(h5, native_typeid)))
*type_info = type;
}
/* The type entry in the array of user-defined types already has
* an open data typeid (and native typeid), so close the ones we
* opened above. */
@ -1589,7 +1604,7 @@ read_var(NC_GRP_INFO_T *grp, hid_t datasetid, const char *obj_name,
/* Learn all about the type of this variable. This will fail for
* HDF5 reference types, and then the var we just created will be
* deleted, thus ignoring HDF5 reference type objects. */
if ((retval = get_type_info2(var->container->nc4_info, hdf5_var->hdf_datasetid,
if ((retval = get_type_info2(var->container, hdf5_var->hdf_datasetid,
&var->type_info)))
BAIL(retval);
@ -1989,6 +2004,14 @@ read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name)
LOG((4, "%s: type_name %s grp->hdr.name %s", __func__, type_name,
grp->hdr.name));
/* What is the class of this type, compound, vlen, etc. */
if ((class = H5Tget_class(hdf_typeid)) < 0)
return NC_EHDFERR;
/* Explicitly don't handle reference types */
if (class == H5T_REFERENCE)
return NC_EBADCLASS;
/* What is the native type for this platform? */
if ((native_typeid = H5Tget_native_type(hdf_typeid, H5T_DIR_DEFAULT)) < 0)
return NC_EHDFERR;
@ -2018,9 +2041,6 @@ read_type(NC_GRP_INFO_T *grp, hid_t hdf_typeid, char *type_name)
if (H5Iinc_ref(hdf5_type->hdf_typeid) < 0)
return NC_EHDFERR;
/* What is the class of this type, compound, vlen, etc. */
if ((class = H5Tget_class(hdf_typeid)) < 0)
return NC_EHDFERR;
switch (class)
{
case H5T_STRING:

View File

@ -23,7 +23,7 @@ SET(NC4_TESTS tst_dims tst_dims2 tst_dims3 tst_files tst_files4
tst_rename2 tst_rename3 tst_h5_endians tst_atts_string_rewrite tst_put_vars_two_unlim_dim
tst_hdf5_file_compat tst_fill_attr_vanish tst_rehash tst_types tst_bug324
tst_atts3 tst_put_vars tst_elatefill tst_udf tst_bug1442 tst_broken_files
tst_quantize)
tst_quantize tst_h_transient_types)
IF(HAS_PAR_FILTERS)
SET(NC4_tests $NC4_TESTS tst_alignment)

View File

@ -35,7 +35,7 @@ tst_h_scalar tst_rename tst_rename2 tst_rename3 tst_h5_endians \
tst_atts_string_rewrite tst_hdf5_file_compat tst_fill_attr_vanish \
tst_rehash tst_filterparser tst_bug324 tst_types tst_atts3 \
tst_put_vars tst_elatefill tst_udf tst_put_vars_two_unlim_dim \
tst_bug1442 tst_quantize
tst_bug1442 tst_quantize tst_h_transient_types
if HAS_PAR_FILTERS
NC4_TESTS += tst_alignment

View File

@ -0,0 +1,124 @@
/* This is part of the netCDF package. Copyright 2018 University
Corporation for Atmospheric Research/Unidata See COPYRIGHT file for
conditions of use.
This program tests fixes for reading netCDF-4 files that contain
transient/unnamed datatypes embedded directly within datasets.
*/
#include "netcdf.h"
#include <config.h>
#include <nc_tests.h>
#include <err_macros.h>
#include <hdf5.h>
#include <stdbool.h>
#define FILE_NAME "tst_h_transient.h5"
#define VAR_NAME "var"
#define ENUM_NAME "bool_var"
/* Don't use the C99 standard `complex` because MSVC has non-compliant
* implementation */
typedef struct {
double r, i;
} complex;
int
main()
{
complex expected_z = {1, 2};
bool expected_b = true;
printf("\n*** Creating file with datasets that have transient datatypes.\n");
{
hid_t fileid, scalar_spaceid;
hid_t dsetid, complex_dtype;
hid_t enum_dtype, enum_dsetid ;
bool enum_value;
/* Create new file, using default properties. */
if ((fileid = H5Fcreate(FILE_NAME, H5F_ACC_TRUNC, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;
/* Create compound datatype, but don't commit to file */
if ((complex_dtype = H5Tcreate(H5T_COMPOUND, sizeof(complex))) < 0) ERR;
if (H5Tinsert(complex_dtype, "r", 0, H5T_NATIVE_DOUBLE) < 0) ERR;
if (H5Tinsert(complex_dtype, "i", sizeof(double), H5T_NATIVE_DOUBLE) < 0) ERR;
/* Create dataset with transient datatype */
if ((scalar_spaceid = H5Screate(H5S_SCALAR)) < 0) ERR;
if ((dsetid = H5Dcreate2(fileid, VAR_NAME, complex_dtype, scalar_spaceid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;
/* Write complex number to file */
if ((H5Dwrite(dsetid, complex_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &expected_z)) <0) ERR;
/* Create enum datatype, but don't commit to file */
if ((enum_dtype = H5Tenum_create(H5T_NATIVE_CHAR)) < 0) ERR;
enum_value = false;
if (H5Tenum_insert(enum_dtype, "FALSE", &enum_value) < 0) ERR;
enum_value = true;
if (H5Tenum_insert(enum_dtype, "TRUE", &enum_value) < 0) ERR;
/* Create dataset with transient enum datatype */
if ((enum_dsetid = H5Dcreate2(fileid, ENUM_NAME, enum_dtype, scalar_spaceid, H5P_DEFAULT, H5P_DEFAULT, H5P_DEFAULT)) < 0) ERR;
/* Write enum to file */
if ((H5Dwrite(enum_dsetid, enum_dtype, H5S_ALL, H5S_ALL, H5P_DEFAULT, &expected_b)) <0) ERR;
/* Close everything */
if (H5Dclose(dsetid) < 0) ERR;
if (H5Dclose(enum_dsetid) < 0) ERR;
if (H5Sclose(scalar_spaceid) < 0) ERR;
if (H5Tclose(complex_dtype) < 0) ERR;
if (H5Tclose(enum_dtype) < 0) ERR;
if (H5Fclose(fileid) < 0) ERR;
}
printf("*** Checking accessing file through netCDF-4 API...");
{
int ncid, varid, enumid;
complex read_z;
int num_types, class;
int *typeids;
nc_type base_nc_type;
char name[NC_MAX_NAME];
size_t size, nfields;
bool read_b;
nc_set_log_level(4);
if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR;
/* Read known types */
if (nc_inq_typeids(ncid, &num_types, NULL)) ERR;
/* Verify there are two known: complex and bool enum */
if (num_types != 2) ERR;
typeids = (int*)malloc((size_t)num_types * sizeof(int));
if (nc_inq_typeids(ncid, NULL, typeids)) ERR;
if (nc_inq_user_type(ncid, typeids[0], name, &size, &base_nc_type, &nfields, &class)) ERR;
free(typeids);
/* Verify that the dataset is present */
if (nc_inq_varid(ncid, VAR_NAME, &varid)) ERR;
/* Read complex variable */
if (nc_get_var(ncid, varid, &read_z)) ERR;
if (read_z.r != expected_z.r) ERR;
if (read_z.i != expected_z.i) ERR;
/* Verify that the enum dataset is present */
if (nc_inq_varid(ncid, ENUM_NAME, &enumid)) ERR;
/* Read and check enum variable */
if (nc_get_var(ncid, enumid, &read_b)) ERR;
if (read_b != expected_b) ERR;
if (nc_close(ncid)) ERR;
}
SUMMARIZE_ERR;
FINAL_RESULTS;
}