2018-12-07 05:13:56 +08:00
|
|
|
/* Copyright 2018-2018 University Corporation for Atmospheric
|
2018-07-12 21:05:21 +08:00
|
|
|
Research/Unidata. */
|
|
|
|
/**
|
|
|
|
* @file This header file contains macros, types, and prototypes for
|
|
|
|
* the HDF5 code in libhdf5. This header should not be included in
|
|
|
|
* code outside libhdf5.
|
|
|
|
*
|
|
|
|
* @author Ed Hartnett
|
2019-02-19 21:09:10 +08:00
|
|
|
*/
|
2018-07-12 21:05:21 +08:00
|
|
|
|
|
|
|
#ifndef _HDF5INTERNAL_
|
|
|
|
#define _HDF5INTERNAL_
|
|
|
|
|
|
|
|
#include "config.h"
|
2018-07-17 22:29:47 +08:00
|
|
|
#include <hdf5.h>
|
|
|
|
#include <hdf5_hl.h>
|
2018-07-16 18:50:15 +08:00
|
|
|
#include "nc4internal.h"
|
2018-07-12 21:05:21 +08:00
|
|
|
#include "ncdimscale.h"
|
2018-07-16 18:50:15 +08:00
|
|
|
#include "nc4dispatch.h"
|
2018-11-26 23:13:57 +08:00
|
|
|
#include "hdf5dispatch.h"
|
2019-03-22 01:33:27 +08:00
|
|
|
#include "netcdf_filter.h"
|
2018-07-12 21:05:21 +08:00
|
|
|
|
|
|
|
#define NC_MAX_HDF5_NAME (NC_MAX_NAME + 10)
|
|
|
|
|
2019-09-18 10:27:43 +08:00
|
|
|
/* These have to do with creating chunked datasets in HDF5. */
|
2018-07-12 21:05:21 +08:00
|
|
|
#define NC_HDF5_UNLIMITED_DIMSIZE (0)
|
|
|
|
#define NC_HDF5_CHUNKSIZE_FACTOR (10)
|
|
|
|
#define NC_HDF5_MIN_CHUNK_SIZE (2)
|
|
|
|
|
|
|
|
#define NC_EMPTY_SCALE "NC_EMPTY_SCALE"
|
|
|
|
|
|
|
|
/* This is an attribute I had to add to handle multidimensional
|
|
|
|
* coordinate variables. */
|
|
|
|
#define COORDINATES "_Netcdf4Coordinates"
|
|
|
|
#define COORDINATES_LEN (NC_MAX_NAME * 5)
|
|
|
|
|
|
|
|
/* This is used when the user defines a non-coordinate variable with
|
|
|
|
* same name as a dimension. */
|
|
|
|
#define NON_COORD_PREPEND "_nc4_non_coord_"
|
|
|
|
|
|
|
|
/* An attribute in the HDF5 root group of this name means that the
|
|
|
|
* file must follow strict netCDF classic format rules. */
|
|
|
|
#define NC3_STRICT_ATT_NAME "_nc3_strict"
|
|
|
|
|
|
|
|
/* If this attribute is present on a dimscale variable, use the value
|
|
|
|
* as the netCDF dimid. */
|
|
|
|
#define NC_DIMID_ATT_NAME "_Netcdf4Dimid"
|
|
|
|
|
|
|
|
/** This is the name of the class HDF5 dimension scale attribute. */
|
|
|
|
#define HDF5_DIMSCALE_CLASS_ATT_NAME "CLASS"
|
|
|
|
|
|
|
|
/** This is the name of the name HDF5 dimension scale attribute. */
|
|
|
|
#define HDF5_DIMSCALE_NAME_ATT_NAME "NAME"
|
|
|
|
|
2019-03-22 01:33:27 +08:00
|
|
|
/** Define Filter API Operations */
|
|
|
|
#define FILTER_REG 1
|
|
|
|
#define FILTER_UNREG 2
|
|
|
|
#define FILTER_INQ 3
|
|
|
|
|
Provide byte-range reading of remote datasets
re: issue https://github.com/Unidata/netcdf-c/issues/1251
Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.
This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.
Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.
Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.
An additional goal here is to gain some experience with
the Amazon S3 REST protocol.
This architecture and its use documented in
the file docs/byterange.dox.
There are currently two test cases:
1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
datasets.
This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).
1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs
Other changes:
1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
fragment tag with a more general mode= tag.
Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.
2019-01-02 09:27:36 +08:00
|
|
|
/** Struct to hold HDF5-specific info for the file. */
|
|
|
|
typedef struct NC_HDF5_FILE_INFO {
|
2018-07-19 22:50:53 +08:00
|
|
|
hid_t hdfid;
|
2019-02-25 07:54:13 +08:00
|
|
|
#ifdef ENABLE_BYTERANGE
|
Provide byte-range reading of remote datasets
re: issue https://github.com/Unidata/netcdf-c/issues/1251
Assume that you have the URL to a remote dataset
which is a normal netcdf-3 or netcdf-4 file.
This PR allows the netcdf-c to read that dataset's
contents as a netcdf file using HTTP byte ranges
if the remote server supports byte-range access.
Originally, this PR was set up to access Amazon S3 objects,
but it can also access other remote datasets such as those
provided by a Thredds server via the HTTPServer access protocol.
It may also work for other kinds of servers.
Note that this is not intended as a true production
capability because, as is known, this kind of access to
can be quite slow. In addition, the byte-range IO drivers
do not currently do any sort of optimization or caching.
An additional goal here is to gain some experience with
the Amazon S3 REST protocol.
This architecture and its use documented in
the file docs/byterange.dox.
There are currently two test cases:
1. nc_test/tst_s3raw.c - this does a simple open, check format, close cycle
for a remote netcdf-3 file and a remote netcdf-4 file.
2. nc_test/test_s3raw.sh - this uses ncdump to investigate some remote
datasets.
This PR also incorporates significantly changed model inference code
(see the superceded PR https://github.com/Unidata/netcdf-c/pull/1259).
1. It centralizes the code that infers the dispatcher.
2. It adds support for byte-range URLs
Other changes:
1. NC_HDF5_finalize was not being properly called by nc_finalize().
2. Fix minor bug in ncgen3.l
3. fix memory leak in nc4info.c
4. add code to walk the .daprc triples and to replace protocol=
fragment tag with a more general mode= tag.
Final Note:
Th inference code is still way too complicated. We need to move
to the validfile() model used by netcdf Java, where each
dispatcher is asked if it can process the file. This decentralizes
the inference code. This will be done after all the major new
dispatchers (PIO, Zarr, etc) have been implemented.
2019-01-02 09:27:36 +08:00
|
|
|
struct HTTP {
|
|
|
|
NCURI* uri; /* Parse of the incoming path, if url */
|
|
|
|
int iosp; /* We are using the S3 rawvirtual file driver */
|
|
|
|
} http;
|
|
|
|
#endif
|
2018-07-19 22:50:53 +08:00
|
|
|
} NC_HDF5_FILE_INFO_T;
|
|
|
|
|
2018-11-08 22:09:11 +08:00
|
|
|
/* This is a struct to handle the dim metadata. */
|
|
|
|
typedef struct NC_HDF5_DIM_INFO
|
|
|
|
{
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t hdf_dimscaleid; /* Non-zero if a DIM_WITHOUT_VARIABLE dataset is in use (no coord var). */
|
|
|
|
HDF5_OBJID_T hdf5_objid;
|
2018-11-08 22:09:11 +08:00
|
|
|
} NC_HDF5_DIM_INFO_T;
|
|
|
|
|
2018-11-08 02:33:02 +08:00
|
|
|
/** Strut to hold HDF5-specific info for attributes. */
|
|
|
|
typedef struct NC_HDF5_ATT_INFO
|
|
|
|
{
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t native_hdf_typeid; /* Native HDF5 datatype for attribute's data */
|
2018-11-08 02:33:02 +08:00
|
|
|
} NC_HDF5_ATT_INFO_T;
|
|
|
|
|
2018-11-12 22:40:15 +08:00
|
|
|
/* Struct to hold HDF5-specific info for a group. */
|
|
|
|
typedef struct NC_HDF5_GRP_INFO
|
|
|
|
{
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t hdf_grpid;
|
2018-11-12 22:40:15 +08:00
|
|
|
} NC_HDF5_GRP_INFO_T;
|
|
|
|
|
2018-11-13 20:44:39 +08:00
|
|
|
/* Struct to hold HDF5-specific info for a variable. */
|
|
|
|
typedef struct NC_HDF5_VAR_INFO
|
|
|
|
{
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t hdf_datasetid;
|
|
|
|
HDF5_OBJID_T *dimscale_hdf5_objids;
|
2020-03-30 02:48:59 +08:00
|
|
|
nc_bool_t dimscale; /**< True if var is a dimscale. */
|
|
|
|
nc_bool_t *dimscale_attached; /**< Array of flags that are true if dimscale is attached for that dim index. */
|
2018-11-13 20:44:39 +08:00
|
|
|
} NC_HDF5_VAR_INFO_T;
|
|
|
|
|
|
|
|
/* Struct to hold HDF5-specific info for a field. */
|
|
|
|
typedef struct NC_HDF5_FIELD_INFO
|
|
|
|
{
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t hdf_typeid;
|
|
|
|
hid_t native_hdf_typeid;
|
2018-11-13 20:44:39 +08:00
|
|
|
} NC_HDF5_FIELD_INFO_T;
|
|
|
|
|
2018-11-20 23:00:48 +08:00
|
|
|
/* Struct to hold HDF5-specific info for a type. */
|
|
|
|
typedef struct NC_HDF5_TYPE_INFO
|
|
|
|
{
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t hdf_typeid;
|
|
|
|
hid_t native_hdf_typeid;
|
2018-11-20 23:00:48 +08:00
|
|
|
} NC_HDF5_TYPE_INFO_T;
|
|
|
|
|
2020-02-17 03:59:33 +08:00
|
|
|
/* Forward */
|
|
|
|
struct NC_FILTER_OBJ_HDF5;
|
|
|
|
|
2018-11-16 23:26:09 +08:00
|
|
|
/* Logging and debugging. */
|
|
|
|
void reportopenobjects(int log, hid_t);
|
|
|
|
int hdf5_set_log_level();
|
|
|
|
|
|
|
|
/* These functions deal with HDF5 dimension scales. */
|
2018-07-12 21:05:21 +08:00
|
|
|
int rec_detach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
|
|
|
|
int rec_reattach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
|
2019-01-21 00:25:04 +08:00
|
|
|
int delete_dimscale_dataset(NC_GRP_INFO_T *grp, int dimid, NC_DIM_INFO_T *dim);
|
2018-11-16 23:26:09 +08:00
|
|
|
|
|
|
|
/* Write metadata. */
|
2018-11-13 20:27:46 +08:00
|
|
|
int nc4_rec_write_metadata(NC_GRP_INFO_T *grp, nc_bool_t bad_coord_order);
|
|
|
|
int nc4_rec_write_groups_types(NC_GRP_INFO_T *grp);
|
2018-11-16 23:26:09 +08:00
|
|
|
|
|
|
|
/* Adjust the cache. */
|
2018-11-13 20:27:46 +08:00
|
|
|
int nc4_adjust_var_cache(NC_GRP_INFO_T *grp, NC_VAR_INFO_T * var);
|
2018-11-16 23:26:09 +08:00
|
|
|
|
|
|
|
/* Open a HDF5 dataset. */
|
|
|
|
int nc4_open_var_grp2(NC_GRP_INFO_T *grp, int varid, hid_t *dataset);
|
|
|
|
|
|
|
|
/* Find types. */
|
2018-11-13 20:27:46 +08:00
|
|
|
NC_TYPE_INFO_T *nc4_rec_find_hdf_type(NC_FILE_INFO_T* h5,
|
|
|
|
hid_t target_hdf_typeid);
|
2018-11-16 23:26:09 +08:00
|
|
|
int nc4_get_hdf_typeid(NC_FILE_INFO_T *h5, nc_type xtype,
|
2019-02-19 21:09:10 +08:00
|
|
|
hid_t *hdf_typeid, int endianness);
|
2018-11-16 23:26:09 +08:00
|
|
|
|
|
|
|
/* Enddef and closing files. */
|
2018-11-13 20:44:39 +08:00
|
|
|
int nc4_close_hdf5_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio);
|
2018-10-23 19:39:00 +08:00
|
|
|
int nc4_rec_grp_HDF5_del(NC_GRP_INFO_T *grp);
|
2018-11-16 23:26:09 +08:00
|
|
|
int nc4_enddef_netcdf4_file(NC_FILE_INFO_T *h5);
|
2020-03-30 02:48:59 +08:00
|
|
|
int nc4_HDF5_close_type(NC_TYPE_INFO_T* type);
|
2018-09-07 05:13:09 +08:00
|
|
|
|
2018-11-13 20:44:39 +08:00
|
|
|
/* Break & reform coordinate variables */
|
|
|
|
int nc4_break_coord_var(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *coord_var, NC_DIM_INFO_T *dim);
|
|
|
|
int nc4_reform_coord_var(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *coord_var, NC_DIM_INFO_T *dim);
|
2018-09-07 05:13:09 +08:00
|
|
|
|
2018-09-05 01:27:47 +08:00
|
|
|
/* In-memory functions */
|
|
|
|
extern hid_t NC4_image_init(NC_FILE_INFO_T* h5);
|
|
|
|
extern void NC4_image_finalize(void*);
|
2018-09-07 05:13:09 +08:00
|
|
|
|
2019-01-28 02:06:02 +08:00
|
|
|
/* Create HDF5 dataset for dim without a coord var. */
|
|
|
|
extern int nc4_create_dim_wo_var(NC_DIM_INFO_T *dim);
|
|
|
|
|
2019-01-28 02:10:41 +08:00
|
|
|
/* Give a var a secret HDF5 name, for use when there is a dim of this
|
|
|
|
* name, but the var is not a coord var of that dim. */
|
|
|
|
extern int nc4_give_var_secret_name(NC_VAR_INFO_T *var);
|
|
|
|
|
2019-01-28 02:06:02 +08:00
|
|
|
/* Get the fill value for a var. */
|
2018-08-09 20:41:54 +08:00
|
|
|
int nc4_get_fill_value(NC_FILE_INFO_T *h5, NC_VAR_INFO_T *var, void **fillp);
|
2018-08-07 00:49:31 +08:00
|
|
|
|
2018-11-26 22:49:58 +08:00
|
|
|
/* Find file, group, var, and att info, doing lazy reads if needed. */
|
|
|
|
int nc4_hdf5_find_grp_var_att(int ncid, int varid, const char *name, int attnum,
|
2018-11-30 23:59:58 +08:00
|
|
|
int use_name, char *norm_name, NC_FILE_INFO_T **h5,
|
2018-11-26 22:49:58 +08:00
|
|
|
NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var,
|
|
|
|
NC_ATT_INFO_T **att);
|
|
|
|
|
2018-12-18 22:48:22 +08:00
|
|
|
/* Find var, doing lazy var metadata read if needed. */
|
|
|
|
int nc4_hdf5_find_grp_h5_var(int ncid, int varid, NC_FILE_INFO_T **h5,
|
|
|
|
NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var);
|
2018-12-12 05:44:04 +08:00
|
|
|
|
2020-03-30 02:48:59 +08:00
|
|
|
int nc4_HDF5_close_att(NC_ATT_INFO_T *att);
|
|
|
|
|
2018-12-20 00:43:32 +08:00
|
|
|
/* Perform lazy read of the rest of the metadata for a var. */
|
|
|
|
int nc4_get_var_meta(NC_VAR_INFO_T *var);
|
2018-12-12 05:44:04 +08:00
|
|
|
|
2019-05-01 04:51:25 +08:00
|
|
|
|
2019-03-22 01:33:27 +08:00
|
|
|
/* Define Filter API Function */
|
2020-02-17 03:59:33 +08:00
|
|
|
int nc4_global_filter_action(int action, unsigned int id, struct NC_FILTER_OBJ_HDF5* infop);
|
|
|
|
int NC4_hdf5_addfilter(NC_VAR_INFO_T* var, int active, unsigned int id, size_t nparams, unsigned int* params);
|
|
|
|
int NC4_hdf5_remove_filter(NC_VAR_INFO_T* var, unsigned int filterid);
|
|
|
|
|
2019-03-10 11:35:57 +08:00
|
|
|
/* Support functions for provenance info (defined in nc4hdf.c) */
|
|
|
|
extern int NC4_hdf5get_libversion(unsigned*,unsigned*,unsigned*);/*libsrc4/nc4hdf.c*/
|
|
|
|
extern int NC4_hdf5get_superblock(struct NC_FILE_INFO*, int*);/*libsrc4/nc4hdf.c*/
|
|
|
|
extern int NC4_isnetcdf4(struct NC_FILE_INFO*); /*libsrc4/nc4hdf.c*/
|
|
|
|
|
2020-02-17 03:59:33 +08:00
|
|
|
extern int nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
|
2019-03-22 01:33:27 +08:00
|
|
|
|
2018-07-12 21:05:21 +08:00
|
|
|
#endif /* _HDF5INTERNAL_ */
|