netcdf-c/include/hdf5internal.h
Dennis Heimbigner 3ffe7be446 Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214

The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.

In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.

So this PR implements the dispatch function and implements
the following standard functions:
    + bzip2
    + zstandard
    + blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.

## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
   dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
  to shell scripts.
* Cleanup szip support to properly support szip
  when HDF5 is disabled. This involves detecting
  libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
  filter API. This means that, for example, nc_def_var_fletcher32
  is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.

## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
  libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
  (including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
  or _Shuffle or _Fletcher32. Used for testing.
2022-03-14 12:39:37 -06:00

223 lines
8.2 KiB
C

/* Copyright 2018-2022 University Corporation for Atmospheric
Research/Unidata. */
/**
* @file This header file contains macros, types, and prototypes for
* the HDF5 code in libhdf5. This header should not be included in
* code outside libhdf5.
*
* @author Ed Hartnett
*/
#ifndef _HDF5INTERNAL_
#define _HDF5INTERNAL_
#include "config.h"
#include <hdf5.h>
#include <hdf5_hl.h>
#include "nc4internal.h"
#include "ncdimscale.h"
#include "nc4dispatch.h"
#include "hdf5dispatch.h"
#include "netcdf_filter.h"
#define NC_MAX_HDF5_NAME (NC_MAX_NAME + 10)
/* These have to do with creating chunked datasets in HDF5. */
#define NC_HDF5_UNLIMITED_DIMSIZE (0)
#define NC_HDF5_CHUNKSIZE_FACTOR (10)
#define NC_HDF5_MIN_CHUNK_SIZE (2)
#define NC_EMPTY_SCALE "NC_EMPTY_SCALE"
/* This is an attribute I had to add to handle multidimensional
* coordinate variables. See nc4internal.h:NC_ATT_COORDINATES.
* in nc4internal.h.
*/
#define COORDINATES NC_ATT_COORDINATES
#define COORDINATES_LEN (NC_MAX_NAME * 5)
/* This is used when the user defines a non-coordinate variable with
* same name as a dimension. */
#define NON_COORD_PREPEND "_nc4_non_coord_"
/* An attribute in the HDF5 root group of this name means that the
* file must follow strict netCDF classic format rules. */
#define NC3_STRICT_ATT_NAME NC_ATT_NC3_STRICT_NAME
/* If this attribute is present on a dimscale variable, use the value
* as the netCDF dimid. */
#define NC_DIMID_ATT_NAME NC_ATT_DIMID_NAME /*See nc4internal.h*/
/** This is the name of the class HDF5 dimension scale attribute. */
#define HDF5_DIMSCALE_CLASS_ATT_NAME NC_ATT_CLASS /*See nc4internal.h*/
/** This is the name of the name HDF5 dimension scale attribute. */
#define HDF5_DIMSCALE_NAME_ATT_NAME NC_ATT_NAME
/* forward */
struct NCauth;
/** Struct to hold HDF5-specific info for the file. */
typedef struct NC_HDF5_FILE_INFO {
hid_t hdfid;
#if defined(ENABLE_BYTERANGE)
int byterange;
NCURI* uri; /* Parse of the incoming path, if url */
#if defined(ENABLE_HDF5_ROS3) || defined(ENABLE_S3_SDK)
struct NCauth* auth;
#endif
#endif
} NC_HDF5_FILE_INFO_T;
/* This is a struct to handle the dim metadata. */
typedef struct NC_HDF5_DIM_INFO
{
hid_t hdf_dimscaleid; /* Non-zero if a DIM_WITHOUT_VARIABLE dataset is in use (no coord var). */
HDF5_OBJID_T hdf5_objid;
} NC_HDF5_DIM_INFO_T;
/** Strut to hold HDF5-specific info for attributes. */
typedef struct NC_HDF5_ATT_INFO
{
hid_t native_hdf_typeid; /* Native HDF5 datatype for attribute's data */
} NC_HDF5_ATT_INFO_T;
/* Struct to hold HDF5-specific info for a group. */
typedef struct NC_HDF5_GRP_INFO
{
hid_t hdf_grpid;
} NC_HDF5_GRP_INFO_T;
/* Struct to hold HDF5-specific info for a variable. */
typedef struct NC_HDF5_VAR_INFO
{
hid_t hdf_datasetid;
HDF5_OBJID_T *dimscale_hdf5_objids;
nc_bool_t dimscale; /**< True if var is a dimscale. */
nc_bool_t *dimscale_attached; /**< Array of flags that are true if dimscale is attached for that dim index. */
int flags;
# define NC_HDF5_VAR_FILTER_MISSING 1 /* if any filter is missing */
} NC_HDF5_VAR_INFO_T;
/* Struct to hold HDF5-specific info for a field. */
typedef struct NC_HDF5_FIELD_INFO
{
hid_t hdf_typeid;
hid_t native_hdf_typeid;
} NC_HDF5_FIELD_INFO_T;
/* Struct to hold HDF5-specific info for a type. */
typedef struct NC_HDF5_TYPE_INFO
{
hid_t hdf_typeid;
hid_t native_hdf_typeid;
} NC_HDF5_TYPE_INFO_T;
/* Logging and debugging. */
void reportopenobjects(int log, hid_t);
int hdf5_set_log_level();
void nc_log_hdf5(void);
/* These functions deal with HDF5 dimension scales. */
int rec_detach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
int rec_reattach_scales(NC_GRP_INFO_T *grp, int dimid, hid_t dimscaleid);
int delete_dimscale_dataset(NC_GRP_INFO_T *grp, int dimid, NC_DIM_INFO_T *dim);
/* Write metadata. */
int nc4_rec_write_metadata(NC_GRP_INFO_T *grp, nc_bool_t bad_coord_order);
int nc4_rec_write_groups_types(NC_GRP_INFO_T *grp);
/* Adjust the cache. */
int nc4_adjust_var_cache(NC_GRP_INFO_T *grp, NC_VAR_INFO_T * var);
/* Open a HDF5 dataset. */
int nc4_open_var_grp2(NC_GRP_INFO_T *grp, int varid, hid_t *dataset);
/* Find types. */
NC_TYPE_INFO_T *nc4_rec_find_hdf_type(NC_FILE_INFO_T* h5,
hid_t target_hdf_typeid);
int nc4_get_hdf_typeid(NC_FILE_INFO_T *h5, nc_type xtype,
hid_t *hdf_typeid, int endianness);
/* Enddef and closing files. */
int nc4_close_hdf5_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio);
int nc4_rec_grp_HDF5_del(NC_GRP_INFO_T *grp);
int nc4_enddef_netcdf4_file(NC_FILE_INFO_T *h5);
int nc4_HDF5_close_type(NC_TYPE_INFO_T* type);
/* Break & reform coordinate variables */
int nc4_break_coord_var(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *coord_var, NC_DIM_INFO_T *dim);
int nc4_reform_coord_var(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *coord_var, NC_DIM_INFO_T *dim);
/* In-memory functions */
extern hid_t NC4_image_init(NC_FILE_INFO_T* h5);
extern void NC4_image_finalize(void*);
/* Create HDF5 dataset for dim without a coord var. */
extern int nc4_create_dim_wo_var(NC_DIM_INFO_T *dim);
/* Give a var a secret HDF5 name, for use when there is a dim of this
* name, but the var is not a coord var of that dim. */
extern int nc4_give_var_secret_name(NC_VAR_INFO_T *var);
/* Find file, group, var, and att info, doing lazy reads if needed. */
int nc4_hdf5_find_grp_var_att(int ncid, int varid, const char *name, int attnum,
int use_name, char *norm_name, NC_FILE_INFO_T **h5,
NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var,
NC_ATT_INFO_T **att);
/* Find var, doing lazy var metadata read if needed. */
int nc4_hdf5_find_grp_h5_var(int ncid, int varid, NC_FILE_INFO_T **h5,
NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var);
int nc4_HDF5_close_att(NC_ATT_INFO_T *att);
/* Perform lazy read of the rest of the metadata for a var. */
int nc4_get_var_meta(NC_VAR_INFO_T *var);
/* Get the file chunk cache settings from HDF5. */
int nc4_hdf5_get_chunk_cache(int ncid, size_t *sizep, size_t *nelemsp,
float *preemptionp);
/* Filter Dispatch Entries */
int NC4_hdf5_def_var_filter(int ncid, int varid, unsigned int filterid, size_t nparams, const unsigned int *params);
int NC4_hdf5_inq_var_filter_ids(int ncid, int varid, size_t* nfiltersp, unsigned int *filterids);
int NC4_hdf5_inq_var_filter_info(int ncid, int varid, unsigned int filterid, size_t* nparamsp, unsigned int *params);
int NC4_hdf5_inq_filter_avail(int ncid, unsigned id);
/* Filterlist management */
/* The NC_VAR_INFO_T->filters field is an NClist of this struct */
struct NC_HDF5_Filter {
int flags; /**< Flags describing state of this filter. */
# define NC_HDF5_FILTER_MISSING 1 /* Filter implementation is not accessible */
unsigned int filterid; /**< ID for arbitrary filter. */
size_t nparams; /**< nparams for arbitrary filter. */
unsigned int* params; /**< Params for arbitrary filter. */
};
int NC4_hdf5_filter_initialize(void);
int NC4_hdf5_filter_finalize(void);
int NC4_hdf5_filter_remove(NC_VAR_INFO_T* var, unsigned int id);
int NC4_hdf5_filter_lookup(NC_VAR_INFO_T* var, unsigned int id, struct NC_HDF5_Filter** fi);
int NC4_hdf5_addfilter(NC_VAR_INFO_T* var, unsigned int id, size_t nparams, const unsigned int* params, int flags);
int NC4_hdf5_filter_freelist(NC_VAR_INFO_T* var);
int NC4_hdf5_find_missing_filter(NC_VAR_INFO_T* var, unsigned int* idp);
/* Add an attribute to the attribute list. */
int nc4_put_att(NC_GRP_INFO_T* grp, int varid, const char *name, nc_type file_type,
size_t len, const void *data, nc_type mem_type, int force);
/* Support functions for provenance info (defined in nc4hdf.c) */
extern int NC4_hdf5get_libversion(unsigned*,unsigned*,unsigned*);/*libsrc4/nc4hdf.c*/
extern int NC4_hdf5get_superblock(struct NC_FILE_INFO*, int*);/*libsrc4/nc4hdf.c*/
extern int NC4_isnetcdf4(struct NC_FILE_INFO*); /*libsrc4/nc4hdf.c*/
extern int nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
EXTERNL hid_t nc4_H5Fopen(const char *filename, unsigned flags, hid_t fapl_id);
EXTERNL hid_t nc4_H5Fcreate(const char *filename, unsigned flags, hid_t fcpl_id, hid_t fapl_id);
int hdf5set_format_compatibility(hid_t fapl_id);
#endif /* _HDF5INTERNAL_ */