netcdf-c/libdispatch/dfilter.c
Dennis Heimbigner 3ffe7be446 Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214

The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.

In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.

So this PR implements the dispatch function and implements
the following standard functions:
    + bzip2
    + zstandard
    + blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.

## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
   dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
  to shell scripts.
* Cleanup szip support to properly support szip
  when HDF5 is disabled. This involves detecting
  libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
  filter API. This means that, for example, nc_def_var_fletcher32
  is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.

## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
  libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
  (including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
  or _Shuffle or _Fletcher32. Used for testing.
2022-03-14 12:39:37 -06:00

438 lines
12 KiB
C

/*
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#ifdef _MSC_VER
#include <io.h>
#endif
#include "netcdf.h"
#include "netcdf_filter.h"
#include "ncdispatch.h"
#include "nc4internal.h"
#ifdef USE_HDF5
#include "hdf5internal.h"
#endif
#ifdef ENABLE_NCZARR
#include "zdispatch.h"
#endif
/*
Unified filter related code
*/
/**************************************************/
/* Per-variable filters */
/**
Find the set of filters (if any) associated with a variable.
Assumes HDF5 format using unsigned ints.
\param ncid NetCDF or group ID, from a previous call to nc_open(),
nc_create(), nc_def_grp(), or associated inquiry functions such as
nc_inq_ncid().
\param varid Variable ID
\param nfilters return no. of filters; may be zero
\param ids return the filter ids (caller allocates)
\returns ::NC_NOERR No error.
\returns ::NC_ENOTNC4 Not a netCDF-4 file.
\returns ::NC_EBADID Bad ncid
\returns ::NC_ENOTVAR Invalid variable ID.
\returns ::NC_EINVAL Invalid arguments
\ingroup variables
\author Dennis Heimbigner
*/
EXTERNL int
nc_inq_var_filter_ids(int ncid, int varid, size_t* nfiltersp, unsigned int* ids)
{
NC* ncp;
int stat = NC_check_id(ncid,&ncp);
if(stat != NC_NOERR) return stat;
TRACE(nc_inq_var_filter_ids);
if((stat = ncp->dispatch->inq_var_filter_ids(ncid,varid,nfiltersp,ids))) goto done;
done:
return stat;
}
/**
Find the the param info about filter (if any)
associated with a variable and with specified id.
Assumes HDF5 format using unsigned ints.
\param ncid NetCDF or group ID, from a previous call to nc_open(),
nc_create(), nc_def_grp(), or associated inquiry functions such as
nc_inq_ncid().
\param varid Variable ID
\param id The filter id of interest
\param nparamsp (Out) Storage which will get the number of parameters to the filter
\param params (Out) Storage which will get associated parameters.
Note: the caller must allocate and free.
\returns ::NC_NOERR No error.
\returns ::NC_ENOTNC4 Not a netCDF-4 file.
\returns ::NC_EBADID Bad ncid.
\returns ::NC_ENOTVAR Invalid variable ID.
\returns ::NC_ENOFILTER Specified filter not defined for this variable.
\ingroup variables
\author Dennis Heimbigner
*/
EXTERNL int
nc_inq_var_filter_info(int ncid, int varid, unsigned int id, size_t* nparamsp, unsigned int* params)
{
NC* ncp;
int stat = NC_check_id(ncid,&ncp);
if(stat != NC_NOERR) return stat;
TRACE(nc_inq_var_filter_info);
if((stat = ncp->dispatch->inq_var_filter_info(ncid,varid,id,nparamsp,params))) goto done;
done:
return stat;
}
/**
Define a new variable filter
Assumes HDF5 format using unsigned ints.
Only variables with chunked storage can use filters.
@param ncid File and group ID.
@param varid Variable ID.
@param id Filter ID.
@param nparams Number of filter parameters.
@param parms Filter parameters.
@return ::NC_NOERR No error.
@return ::NC_EINVAL Variable must be chunked.
@return ::NC_EBADID Bad ID.
@author Dennis Heimbigner
*/
EXTERNL int
nc_def_var_filter(int ncid, int varid, unsigned int id, size_t nparams, const unsigned int* params)
{
int stat = NC_NOERR;
NC* ncp;
int fixedsize;
nc_type xtype;
TRACE(nc_inq_var_filter);
if((stat = NC_check_id(ncid,&ncp))) return stat;
/* Get variable' type */
if((stat = nc_inq_vartype(ncid,varid,&xtype))) return stat;
/* If the variable's type is not fixed-size, then signal error */
if((stat = NC4_inq_type_fixed_size(ncid, xtype, &fixedsize))) return stat;
if(!fixedsize) return NC_EFILTER;
if((stat = ncp->dispatch->def_var_filter(ncid,varid,id,nparams,params))) goto done;
done:
return stat;
}
/**
Find the first filter (if any) associated with a variable.
Assumes HDF5 format using unsigned int.
\param ncid NetCDF or group ID, from a previous call to nc_open(),
nc_create(), nc_def_grp(), or associated inquiry functions such as
nc_inq_ncid().
\param varid Variable ID
\param idp Storage which will get the filter id; a return value of zero means variable has no filters.
\param nparamsp Storage which will get the number of parameters to the filter
\param params Storage which will get associated parameters (call allocates and frees).
This is redundant over the multi-filter API, so
it can be implemented in terms of those functions.
\returns ::NC_NOERR No error.
\returns ::NC_ENOTNC4 Not a netCDF-4 file.
\returns ::NC_EBADID Bad ncid.
\returns ::NC_ENOTVAR Invalid variable ID.
\ingroup variables
\author Dennis Heimbigner
*/
EXTERNL int
nc_inq_var_filter(int ncid, int varid, unsigned int* idp, size_t* nparamsp, unsigned int* params)
{
NC* ncp;
size_t nfilters;
unsigned int* ids = NULL;
int stat = NC_check_id(ncid,&ncp);
if(stat != NC_NOERR) return stat;
TRACE(nc_inq_var_filter);
/* Get the number of filters on this variable */
if((stat = nc_inq_var_filter_ids(ncid,varid,&nfilters, NULL))) goto done;
/* If no filters, then return zero */
if(nfilters == 0) {
if(idp) *idp = 0;
goto done;
}
/* Get the filter ids */
if((ids = calloc(sizeof(unsigned int),nfilters)) == NULL) {stat = NC_ENOMEM; goto done;}
if((stat = nc_inq_var_filter_ids(ncid,varid,&nfilters, ids))) goto done;
/* Get params for the first filter */
if((stat = nc_inq_var_filter_info(ncid,varid,ids[0],nparamsp,params))) goto done;
if(idp) *idp = ids[0];
done:
nullfree(ids);
return stat;
}
/**************************************************/
/* Test if filter is available. Would prefer
returning a list of all available filters, but HDF5
does not support that capability.
@param file for which filter list is desired
@param id filter id of interest
@return NC_NOERR if the filter is available
@return NC_EBADID if ncid is invalid
@return NC_ENOFILTER if filter is not available.
*/
EXTERNL int
nc_inq_filter_avail(int ncid, unsigned id)
{
int stat = NC_NOERR;
NC* ncp;
stat = NC_check_id(ncid,&ncp);
if(stat != NC_NOERR) return stat;
if((stat = ncp->dispatch->inq_filter_avail(ncid,id))) goto done;
done:
return stat;
}
/**************************************************/
/* Support direct user defined filters */
#ifdef ENABLE_CLIENTSIDE_FILTERS
/* Use void* to avoid having to include hdf.h*/
EXTERNL int
nc_filter_client_register(unsigned int id, void* info)
{
int stat = NC_NOERR;
#ifdef USE_HDF5
NC_FILTER_OBJ_HDF5 client;
if(id == 0 ||info == NULL)
return NC_EINVAL;
memset(&client,0,sizeof(client));
client.hdr.format = NC_FILTER_FORMAT_HDF5;
client.sort = NC_FILTER_SORT_CLIENT;
client.u.client.id = id;
client.u.client.info = info;
/* Note use of a global function, not part of the dispatch table */
stat = nc4_global_filter_action(NCFILTER_CLIENT_REG, id, &client);
#else
stat = NC_ENOTBUILT;
#endif
return stat;
}
EXTERNL int
nc_filter_client_unregister(unsigned int id)
{
int stat = NC_NOERR;
#ifdef USE_HDF5
stat = nc4_global_filter_action(NCFILTER_CLIENT_UNREG, id, NULL);
#else
stat = NC_ENOTBUILT;
#endif
return stat;
}
/* Use void* to avoid having to include hdf.h*/
EXTERNL int
nc_filter_client_inq(unsigned int id, void* infop)
{
int stat = NC_NOERR;
#ifdef USE_HDF5
H5Z_class2_t* hct = (H5Z_class2_t*)infop;
NC_FILTER_OBJ_HDF5 client;
if(id == 0 ||infop == NULL)
return NC_EINVAL;
memset(&client,0,sizeof(client));
client.hdr.format = NC_FILTER_FORMAT_HDF5;
client.sort = NC_FILTER_SORT_CLIENT;
client.u.client.id = id;
client.u.client.info = hct;
/* Note use of a global function, not part of the dispatch table */
stat = nc4_global_filter_action(NCFILTER_CLIENT_INQ, id, &client);
if(stat == NC_NOERR) {
*hct = *(H5Z_class2_t*)client.u.client.info;
}
#else
stat = NC_ENOTBUILT;
#endif
return stat;
}
#endif /*ENABLE_CLIENTSIDE_FILTERS*/
/**************************************************/
/* Functions for accessing standardized filters */
int
nc_def_var_bzip2(int ncid, int varid, int level)
{
int stat = NC_NOERR;
unsigned ulevel;
if((stat = nc_inq_filter_avail(ncid,H5Z_FILTER_BZIP2))) goto done;
/* Filter is available */
/* 1 <= Level <= 9 */
if (level < 1 || level > 9)
return NC_EINVAL;
ulevel = (unsigned) level; /* Keep bit pattern */
if((stat = nc_def_var_filter(ncid,varid,H5Z_FILTER_BZIP2,1,&ulevel))) goto done;
done:
return stat;
}
int
nc_inq_var_bzip2(int ncid, int varid, int* hasfilterp, int *levelp)
{
int stat = NC_NOERR;
size_t nparams;
unsigned params = 0;
int hasfilter = 0;
if((stat = nc_inq_filter_avail(ncid,H5Z_FILTER_BZIP2))) goto done;
/* Filter is available */
/* Get filter info */
stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_BZIP2,&nparams,NULL);
if(stat == NC_ENOFILTER) {stat = NC_NOERR; hasfilter = 0; goto done;}
if(stat != NC_NOERR) goto done;
hasfilter = 1;
if(nparams != 1) {stat = NC_EFILTER; goto done;}
if((stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_BZIP2,&nparams,&params))) goto done;
done:
if(levelp) *levelp = (int)params;
if(hasfilterp) *hasfilterp = hasfilter;
return stat;
}
int
nc_def_var_zstandard(int ncid, int varid, int level)
{
#ifdef HAVE_ZSTD
int stat = NC_NOERR;
unsigned ulevel;
if((stat = nc_inq_filter_avail(ncid,H5Z_FILTER_ZSTD))) goto done;
/* Filter is available */
/* Level must be between -131072 and 22 on Zstandard v. 1.4.5 (~202009)
Earlier versions have fewer levels (especially fewer negative levels) */
if (level < -131072 || level > 22)
return NC_EINVAL;
ulevel = (unsigned) level; /* Keep bit pattern */
if((stat = nc_def_var_filter(ncid,varid,H5Z_FILTER_ZSTD,1,&ulevel))) goto done;
done:
return stat;
#else
return NC_NOERR;
#endif /*HAVE_ZSTD*/
}
int
nc_inq_var_zstandard(int ncid, int varid, int* hasfilterp, int *levelp)
{
#ifdef HAVE_ZSTD
int stat = NC_NOERR;
size_t nparams;
unsigned params = 0;
int hasfilter = 0;
if((stat = nc_inq_filter_avail(ncid,H5Z_FILTER_ZSTD))) goto done;
/* Filter is available */
/* Get filter info */
stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_ZSTD,&nparams,NULL);
if(stat == NC_ENOFILTER) {stat = NC_NOERR; hasfilter = 0; goto done;}
if(stat != NC_NOERR) goto done;
hasfilter = 1;
if(nparams != 1) {stat = NC_EFILTER; goto done;}
if((stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_ZSTD,&nparams,&params))) goto done;
done:
if(levelp) *levelp = (int)params;
if(hasfilterp) *hasfilterp = hasfilter;
return stat;
#else
return NC_NOERR;
#endif /*HAVE_ZSTD*/
}
int
nc_def_var_blosc(int ncid, int varid, unsigned subcompressor, unsigned level, unsigned blocksize, unsigned addshuffle)
{
#ifdef HAVE_BLOSC
int stat = NC_NOERR;
unsigned params[7];;
if((stat = nc_inq_filter_avail(ncid,H5Z_FILTER_BLOSC))) goto done;
/* Filter is available */
/* Verify parameters */
if(addshuffle > (unsigned)BLOSC_BITSHUFFLE) {stat = NC_EINVAL; goto done;}
if(subcompressor > (unsigned)BLOSC_ZSTD) {stat = NC_EINVAL; goto done;}
/* Set the parameters */
params[0] = 0;
params[1] = 0;
params[2] = 0;
params[3] = blocksize;
params[4] = level;
params[5] = addshuffle;
params[6] = subcompressor;
if((stat = nc_def_var_filter(ncid,varid,H5Z_FILTER_BLOSC,7,params))) goto done;
done:
return stat;
#else
return NC_NOERR;
#endif
}
int
nc_inq_var_blosc(int ncid, int varid, int* hasfilterp, unsigned* subcompressorp, unsigned* levelp, unsigned* blocksizep, unsigned* addshufflep)
{
#ifdef HAVE_BLOSC
int stat = NC_NOERR;
size_t nparams;
unsigned params[7];
int hasfilter = 0;
if((stat = nc_inq_filter_avail(ncid,H5Z_FILTER_BLOSC))) goto done;
/* Filter is available */
/* Get filter info */
stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_BLOSC,&nparams,NULL);
if(stat == NC_ENOFILTER) {stat = NC_NOERR; hasfilter = 0; goto done;}
if(stat != NC_NOERR) goto done;
hasfilter = 1;
if(nparams != 7) {stat = NC_EFILTER; goto done;}
if((stat = nc_inq_var_filter_info(ncid,varid,H5Z_FILTER_BLOSC,&nparams,params))) goto done;
if(blocksizep) *blocksizep = params[3];
if(levelp) *levelp = params[4];
if(addshufflep) *addshufflep = params[5];
if(subcompressorp) *subcompressorp = params[6];
done:
if(hasfilterp) *hasfilterp = hasfilter;
return stat;
#else
return NC_NOERR;
#endif
}