2018-12-07 05:13:56 +08:00
|
|
|
/* Copyright 2018-2018 University Corporation for Atmospheric
|
2018-04-05 04:11:44 +08:00
|
|
|
Research/Unidata. */
|
|
|
|
/**
|
2018-08-07 00:16:49 +08:00
|
|
|
* @file
|
|
|
|
* @internal This header file contains macros, types and prototypes
|
|
|
|
* used to build and manipulate the netCDF metadata model.
|
2018-07-12 21:05:21 +08:00
|
|
|
*
|
|
|
|
* @author Ed Hartnett, Dennis Heimbigner, Ward Fisher
|
2019-02-19 21:10:30 +08:00
|
|
|
*/
|
2010-06-03 21:24:43 +08:00
|
|
|
|
|
|
|
#ifndef _NC4INTERNAL_
|
|
|
|
#define _NC4INTERNAL_
|
2019-11-05 05:07:50 +08:00
|
|
|
#include "netcdf.h"
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2016-05-04 11:17:06 +08:00
|
|
|
#include "config.h"
|
2010-06-03 21:24:43 +08:00
|
|
|
#include <stdio.h>
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <ctype.h>
|
|
|
|
#include <string.h>
|
2016-06-09 01:26:37 +08:00
|
|
|
|
2016-05-04 11:17:06 +08:00
|
|
|
#include "nc_logging.h"
|
2018-03-17 01:46:18 +08:00
|
|
|
#include "ncindex.h"
|
2018-11-28 07:09:17 +08:00
|
|
|
#include "nc_provenance.h"
|
2020-11-20 08:01:04 +08:00
|
|
|
#include "nchashmap.h"
|
2019-11-05 05:07:50 +08:00
|
|
|
|
|
|
|
#include "netcdf_f.h"
|
|
|
|
#include "netcdf_mem.h"
|
2020-02-17 03:59:33 +08:00
|
|
|
#include "netcdf_filter.h"
|
2010-06-03 21:24:43 +08:00
|
|
|
#ifdef USE_PARALLEL
|
2016-05-04 11:17:06 +08:00
|
|
|
#include "netcdf_par.h"
|
2010-06-03 21:24:43 +08:00
|
|
|
#endif /* USE_PARALLEL */
|
|
|
|
|
2012-09-07 03:44:03 +08:00
|
|
|
/* Always needed */
|
|
|
|
#include "nc.h"
|
|
|
|
|
2019-08-22 06:04:59 +08:00
|
|
|
/** The file ID is stored in the first two bytes of ncid. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define FILE_ID_MASK (0xffff0000)
|
2019-08-22 06:04:59 +08:00
|
|
|
|
|
|
|
/** The group ID is stored in the last two bytes of ncid. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define GRP_ID_MASK (0x0000ffff)
|
2019-08-22 06:04:59 +08:00
|
|
|
|
|
|
|
/** File and group IDs are each 16 bits of the ncid. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define ID_SHIFT (16)
|
|
|
|
|
2019-08-22 06:04:59 +08:00
|
|
|
/* typedef enum {GET, PUT} NC_PG_T; */
|
|
|
|
/** These are the different objects that can be in our hash-lists. */
|
2020-03-30 02:48:59 +08:00
|
|
|
typedef enum {NCNAT, NCVAR, NCDIM, NCATT, NCTYP, NCFLD, NCGRP, NCFIL} NC_SORT;
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2019-08-22 06:04:59 +08:00
|
|
|
/** The netCDF V2 error code. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define NC_V2_ERR (-1)
|
|
|
|
|
2019-08-22 06:04:59 +08:00
|
|
|
/** The name of the root group. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define NC_GROUP_NAME "/"
|
|
|
|
|
2019-08-22 06:04:59 +08:00
|
|
|
/** One mega-byte. */
|
2010-06-03 21:24:43 +08:00
|
|
|
#define MEGABYTE 1048576
|
|
|
|
|
2020-02-08 00:09:01 +08:00
|
|
|
/** The HDF5 ID for the szip filter. */
|
|
|
|
#define HDF5_FILTER_SZIP 4
|
|
|
|
|
2019-08-22 06:04:59 +08:00
|
|
|
#define X_SCHAR_MIN (-128) /**< Minimum signed char value. */
|
|
|
|
#define X_SCHAR_MAX 127 /**< Maximum signed char value. */
|
|
|
|
#define X_UCHAR_MAX 255U /**< Maximum unsigned char value. */
|
2019-09-18 10:27:43 +08:00
|
|
|
#define X_SHORT_MIN (-32768) /**< Minimum short value. */
|
2019-08-22 06:04:59 +08:00
|
|
|
#define X_SHRT_MIN X_SHORT_MIN /**< This alias is compatible with limits.h. */
|
|
|
|
#define X_SHORT_MAX 32767 /**< Maximum short value. */
|
|
|
|
#define X_SHRT_MAX X_SHORT_MAX /**< This alias is compatible with limits.h. */
|
|
|
|
#define X_USHORT_MAX 65535U /**< Maximum unsigned short value. */
|
|
|
|
#define X_USHRT_MAX X_USHORT_MAX /**< This alias is compatible with limits.h. */
|
|
|
|
#define X_INT_MIN (-2147483647-1) /**< Minimum int value. */
|
|
|
|
#define X_INT_MAX 2147483647 /**< Maximum int value. */
|
|
|
|
#define X_LONG_MIN X_INT_MIN /**< Minimum long value. */
|
|
|
|
#define X_LONG_MAX X_INT_MAX /**< Maximum long value. */
|
|
|
|
#define X_UINT_MAX 4294967295U /**< Maximum unsigned int value. */
|
|
|
|
#define X_INT64_MIN (-9223372036854775807LL-1LL) /**< Minimum int64 value. */
|
|
|
|
#define X_INT64_MAX 9223372036854775807LL /**< Maximum int64 value. */
|
|
|
|
#define X_UINT64_MAX 18446744073709551615ULL /**< Maximum unsigned int64 value. */
|
2019-11-07 20:48:44 +08:00
|
|
|
#ifdef _WIN32 /* Windows, of course, has to be a *little* different. */
|
2019-02-19 21:10:30 +08:00
|
|
|
#define X_FLOAT_MAX 3.402823466e+38f
|
2010-06-03 21:24:43 +08:00
|
|
|
#else
|
2019-08-22 06:04:59 +08:00
|
|
|
#define X_FLOAT_MAX 3.40282347e+38f /**< Maximum float value. */
|
2019-11-07 20:48:44 +08:00
|
|
|
#endif /* _WIN32 */
|
2019-08-22 06:04:59 +08:00
|
|
|
#define X_FLOAT_MIN (-X_FLOAT_MAX) /**< Minimum float value. */
|
|
|
|
#define X_DOUBLE_MAX 1.7976931348623157e+308 /**< Maximum double value. */
|
|
|
|
#define X_DOUBLE_MIN (-X_DOUBLE_MAX) /**< Minimum double value. */
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2018-05-25 04:27:16 +08:00
|
|
|
/** This is the number of netCDF atomic types. */
|
2018-06-09 05:50:28 +08:00
|
|
|
#define NUM_ATOMIC_TYPES (NC_MAX_ATOMIC_TYPE + 1)
|
2018-05-25 04:27:16 +08:00
|
|
|
|
2018-08-07 00:16:49 +08:00
|
|
|
/** Number of parameters needed for ZLIB filter. */
|
|
|
|
#define CD_NELEMS_ZLIB 1
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** Get a pointer to the NC_FILE_INFO_T from dispatchdata field. */
|
|
|
|
#define NC4_DATA(nc) ((NC_FILE_INFO_T *)(nc)->dispatchdata)
|
|
|
|
|
|
|
|
/** Set a pointer to the NC_FILE_INFO_T in the dispatchdata field. */
|
|
|
|
#define NC4_DATA_SET(nc,data) ((nc)->dispatchdata = (void *)(data))
|
2019-08-22 06:04:59 +08:00
|
|
|
|
|
|
|
/* Reserved attribute flags: must be powers of 2. */
|
2021-02-25 04:46:11 +08:00
|
|
|
/** Hidden attributes; immutable and unreadable thru API. */
|
|
|
|
#define HIDDENATTRFLAG 1
|
2019-08-22 06:04:59 +08:00
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/** Readonly attributes; readable, but immutable thru the API. */
|
2019-08-22 06:04:59 +08:00
|
|
|
#define READONLYFLAG 2
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** Subset of readonly flags; readable by name only thru the API. */
|
2019-08-22 06:04:59 +08:00
|
|
|
#define NAMEONLYFLAG 4
|
|
|
|
|
2021-09-03 07:04:26 +08:00
|
|
|
/** Per-variable attribute, as opposed to global */
|
|
|
|
#define VARFLAG 16
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** Boolean type, to make the code easier to read. */
|
2014-02-12 07:12:08 +08:00
|
|
|
typedef enum {NC_FALSE = 0, NC_TRUE = 1} nc_bool_t;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/* Forward declarations. */
|
2018-03-17 01:46:18 +08:00
|
|
|
struct NC_GRP_INFO;
|
|
|
|
struct NC_TYPE_INFO;
|
2022-01-30 06:27:52 +08:00
|
|
|
struct NCRCinfo;
|
2016-05-04 11:17:06 +08:00
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/**
|
|
|
|
* This struct provides indexed Access to Meta-data objects. See the
|
|
|
|
* document docs/indexing.dox for detailed information.
|
|
|
|
*
|
|
|
|
* Basically it provides a common header and use NCindex instances
|
|
|
|
* instead of linked lists.
|
|
|
|
*
|
|
|
|
* WARNING: ALL OBJECTS THAT CAN BE INSERTED INTO AN NCindex MUST HAVE
|
|
|
|
* AN INSTANCE of NC_OBJ AS THE FIRST FIELD.
|
2018-03-17 01:46:18 +08:00
|
|
|
*/
|
2019-08-22 06:04:59 +08:00
|
|
|
typedef struct NC_OBJ
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
NC_SORT sort; /**< Type of object. */
|
|
|
|
char* name; /**< Name, assumed to be null terminated. */
|
2023-10-26 05:14:29 +08:00
|
|
|
int id; /**< This objects ID. */
|
2018-03-17 01:46:18 +08:00
|
|
|
} NC_OBJ;
|
Refactored read_scale(), memio_new(), var_create_dataset() and makespecial()
to clean up resources properly on failure.
Refactored doubly-linked list code for objects in the libsrc4 directory,
cleaning up the add/del routines, breaking out the common next/prev
pointers into a struct and extracting the add/del operations on them,
changed the list of dims to add new dims in the same order as the other
types, made all add routines able to optionally return a pointer to the
newly created object.
Removed some dead code (pg_var(), nc4_pg_var1(), nc4_pg_varm(), misc. small
routines, etc)
Fixed fill value handling for string types in nc4_get_vara().
Changed many malloc()+strcpy() pairs into calls to strdup().
Cleaned up misc. other minor Coverity issues.
2013-12-08 17:29:26 +08:00
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/**
|
|
|
|
* This struct holds information about reserved attributes. These
|
|
|
|
* attributes cannot be created or read by the user (through the
|
|
|
|
* netCDF API). */
|
2019-08-22 06:04:59 +08:00
|
|
|
typedef struct NC_reservedatt
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
const char *name; /**< Name of the reserved attribute. */
|
|
|
|
int flags; /**< Flags that control handling of reserved attribute. */
|
2019-08-22 06:04:59 +08:00
|
|
|
} NC_reservedatt;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This is a struct to handle the dimension metadata. */
|
2010-06-03 21:24:43 +08:00
|
|
|
typedef struct NC_DIM_INFO
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
NC_OBJ hdr; /**< The hdr contains the name and ID. */
|
|
|
|
struct NC_GRP_INFO *container; /**< Pointer to containing group. */
|
|
|
|
size_t len; /**< Length of this dimension. */
|
|
|
|
nc_bool_t unlimited; /**< True if the dimension is unlimited */
|
|
|
|
nc_bool_t extended; /**< True if the dimension needs to be extended. */
|
|
|
|
nc_bool_t too_long; /**< True if len is too big to fit in local size_t. */
|
|
|
|
void *format_dim_info; /**< Pointer to format-specific dim info. */
|
|
|
|
struct NC_VAR_INFO *coord_var; /**< The coord var, if it exists. */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_DIM_INFO_T;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This is a struct to handle the attribute metadata. */
|
2010-06-03 21:24:43 +08:00
|
|
|
typedef struct NC_ATT_INFO
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
NC_OBJ hdr; /**< The hdr contains the name and ID. */
|
|
|
|
struct NC_OBJ *container; /**< Pointer to containing group|var. */
|
2024-03-05 01:05:03 +08:00
|
|
|
size_t len; /**< Length of attribute data. */
|
2019-08-22 08:31:37 +08:00
|
|
|
nc_bool_t dirty; /**< True if attribute modified. */
|
|
|
|
nc_bool_t created; /**< True if attribute already created. */
|
|
|
|
nc_type nc_typeid; /**< NetCDF type of attribute's data. */
|
|
|
|
void *format_att_info; /**< Pointer to format-specific att info. */
|
|
|
|
void *data; /**< The attribute data. */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_ATT_INFO_T;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This is a struct to handle the var metadata. */
|
2010-06-03 21:24:43 +08:00
|
|
|
typedef struct NC_VAR_INFO
|
|
|
|
{
|
2020-03-03 22:16:56 +08:00
|
|
|
NC_OBJ hdr; /**< The hdr contains the name and ID. */
|
2020-03-30 02:48:59 +08:00
|
|
|
char *alt_name; /**< Used if name in dispatcher must be different from hdr.name. */
|
2019-08-22 08:31:37 +08:00
|
|
|
struct NC_GRP_INFO *container; /**< Pointer to containing group. */
|
2020-03-03 22:16:56 +08:00
|
|
|
size_t ndims; /**< Number of dims. */
|
|
|
|
int *dimids; /**< Dim IDs. */
|
|
|
|
NC_DIM_INFO_T **dim; /**< Pointer to array of NC_DIM_INFO_T. */
|
|
|
|
nc_bool_t is_new_var; /**< True if variable is newly created. */
|
|
|
|
nc_bool_t was_coord_var; /**< True if variable was a coordinate var, but either the dim or var has been renamed. */
|
|
|
|
nc_bool_t became_coord_var; /**< True if variable _became_ a coordinate var, because either the dim or var has been renamed. */
|
|
|
|
nc_bool_t fill_val_changed; /**< True if variable's fill value changes after it has been created. */
|
|
|
|
nc_bool_t attr_dirty; /**< True if variable's attributes are dirty and should be rewritten. */
|
|
|
|
nc_bool_t created; /**< Variable has already been created (_not_ that it was just created). */
|
|
|
|
nc_bool_t written_to; /**< True if variable has data written to it. */
|
|
|
|
struct NC_TYPE_INFO *type_info; /**< Contains info about the variable type. */
|
2019-08-22 08:31:37 +08:00
|
|
|
int atts_read; /**< If true, the atts have been read. */
|
|
|
|
nc_bool_t meta_read; /**< True if this vars metadata has been completely read. */
|
|
|
|
nc_bool_t coords_read; /**< True if this var has hidden coordinates att, and it has been read. */
|
2020-03-03 22:16:56 +08:00
|
|
|
NCindex *att; /**< List of NC_ATT_INFO_T. */
|
|
|
|
nc_bool_t no_fill; /**< True if no fill value is defined for var. */
|
|
|
|
void *fill_value; /**< Pointer to fill value, or NULL. */
|
|
|
|
size_t *chunksizes; /**< For chunked storage, an array (size ndims) of chunksizes. */
|
2020-03-07 20:08:12 +08:00
|
|
|
int storage; /**< Storage of this var, compact, contiguous, or chunked. */
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
int endianness; /**< What endianness for the var? */
|
2020-03-03 22:16:56 +08:00
|
|
|
int parallel_access; /**< Type of parallel access for I/O on variable (collective or independent). */
|
2022-01-30 06:27:52 +08:00
|
|
|
struct ChunkCache {
|
|
|
|
size_t size; /**< Size in bytes of the var chunk cache. */
|
|
|
|
size_t nelems; /**< Number of slots in var chunk cache. */
|
|
|
|
float preemption; /**< Chunk cache preemtion policy. */
|
|
|
|
} chunkcache;
|
2021-08-24 14:45:38 +08:00
|
|
|
int quantize_mode; /**< Quantize mode. NC_NOQUANTIZE is 0, and means no quantization. */
|
|
|
|
int nsd; /**< Number of significant digits if quantization is used, 0 if not. */
|
2019-08-22 08:31:37 +08:00
|
|
|
void *format_var_info; /**< Pointer to any binary format info. */
|
2020-09-28 02:43:46 +08:00
|
|
|
void* filters; /**< Record of the list of filters to be applied to var data; format dependent */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_VAR_INFO_T;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This is a struct to handle the field metadata from a user-defined
|
|
|
|
* type. */
|
2010-06-03 21:24:43 +08:00
|
|
|
typedef struct NC_FIELD_INFO
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
NC_OBJ hdr; /**< The hdr contains the name and ID. */
|
|
|
|
nc_type nc_typeid; /**< The type of this field. */
|
|
|
|
size_t offset; /**< Offset in bytes of field. */
|
|
|
|
int ndims; /**< Number of dims. */
|
|
|
|
int *dim_size; /**< Dim sizes. */
|
|
|
|
void *format_field_info; /**< Pointer to any binary format info for field. */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_FIELD_INFO_T;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This is a struct to handle metadata for a user-defined enum
|
|
|
|
* type. */
|
2010-06-03 21:24:43 +08:00
|
|
|
typedef struct NC_ENUM_MEMBER_INFO
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
char *name; /**< Name of member. */
|
|
|
|
void *value; /**< Value of member. */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_ENUM_MEMBER_INFO_T;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This is a struct to handle metadata for a user-defined type. */
|
2010-06-03 21:24:43 +08:00
|
|
|
typedef struct NC_TYPE_INFO
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
NC_OBJ hdr; /**< The hdr contains the name and ID. */
|
|
|
|
struct NC_GRP_INFO *container; /**< Containing group */
|
|
|
|
unsigned rc; /**< Ref. count of objects using this type */
|
|
|
|
int endianness; /**< What endianness for the type? */
|
|
|
|
size_t size; /**< Size of the type in memory, in bytes */
|
|
|
|
nc_bool_t committed; /**< True when datatype is committed in the file */
|
|
|
|
nc_type nc_type_class; /**< NC_VLEN, NC_COMPOUND, NC_OPAQUE, NC_ENUM, NC_INT, NC_FLOAT, or NC_STRING. */
|
|
|
|
void *format_type_info; /**< HDF5-specific type info. */
|
Improve performance of the nc_reclaim_data and nc_copy_data functions.
re: Issue https://github.com/Unidata/netcdf-c/issues/2685
re: PR https://github.com/Unidata/netcdf-c/pull/2179
As noted in PR https://github.com/Unidata/netcdf-c/pull/2179,
the old code did not allow for reclaiming instances of types,
nor for properly copying them. That PR provided new functions
capable of reclaiming/copying instances of arbitrary types.
However, as noted by Issue https://github.com/Unidata/netcdf-c/issues/2685, using these
most general functions resulted in a significant performance
degradation, even for common cases.
This PR attempts to mitigate the cost of using the general
reclaim/copy functions in two ways.
First, the previous functions operating at the top level by
using ncid and typeid arguments. These functions were augmented
with equivalent versions that used the netcdf-c library internal
data structures to allow direct access to needed information.
These new functions are used internally to the library.
The second mitigation involves optimizing the internal functions
by providing early tests for common cases. This avoids
unnecessary recursive function calls.
The overall result is a significant improvement in speed by a
factor of roughly twenty -- your mileage may vary. These
optimized functions are still not as fast as the original (more
limited) functions, but they are getting close. Additional optimizations are
possible. But the cost is a significant "uglification" of the
code that I deemed a step too far, at least for now.
## Misc. Changes
1. Added a test case to check the proper reclamation/copy of complex types.
2. Found and fixed some places where nc_reclaim/copy should have been used.
3. Replaced, in the netcdf-c library, (almost all) occurrences of nc_reclaim_copy with calls to NC_reclaim/copy. This plus the optimizations is the primary speed-up mechanism.
4. In DAP4, the metadata is held in a substrate in-memory file; this required some changes so that the reclaim/copy code accessed that substrate dispatcher rather than the DAP4 dispatcher.
5. Re-factored and isolated the code that computes if a type is (transitively) variable-sized or not.
6. Clean up the reclamation code in ncgen; adding the use of nc_reclaim exposed some memory problems.
2023-05-21 07:11:25 +08:00
|
|
|
int varsized; /**< <! 1 if this type is (recursively) variable sized; 0 if fixed size */
|
2019-08-22 08:31:37 +08:00
|
|
|
|
|
|
|
/** Information for each type or class */
|
2019-02-19 21:10:30 +08:00
|
|
|
union {
|
|
|
|
struct {
|
2019-08-22 08:31:37 +08:00
|
|
|
NClist* enum_member; /**< <! NClist<NC_ENUM_MEMBER_INFO_T*> */
|
|
|
|
nc_type base_nc_typeid; /**< Typeid of the base type. */
|
|
|
|
} e; /**< Enum */
|
2019-02-19 21:10:30 +08:00
|
|
|
struct Fields {
|
2019-08-22 08:31:37 +08:00
|
|
|
NClist* field; /**< <! NClist<NC_FIELD_INFO_T*> */
|
|
|
|
} c; /**< Compound */
|
2019-02-19 21:10:30 +08:00
|
|
|
struct {
|
2019-08-22 08:31:37 +08:00
|
|
|
nc_type base_nc_typeid; /**< Typeid of the base type. */
|
|
|
|
} v; /**< Variable-length. */
|
|
|
|
} u; /**< Union of structs, for each type/class. */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_TYPE_INFO_T;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** This holds information for one group. Groups reproduce with
|
2010-06-03 21:24:43 +08:00
|
|
|
* parthenogenesis. */
|
|
|
|
typedef struct NC_GRP_INFO
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
NC_OBJ hdr; /**< The hdr contains the name and ID. */
|
|
|
|
void *format_grp_info; /**< Pointer to binary format info for group. */
|
|
|
|
struct NC_FILE_INFO *nc4_info; /**< Pointer containing NC_FILE_INFO_T. */
|
|
|
|
struct NC_GRP_INFO *parent; /**< Pointer tp parent group. */
|
|
|
|
int atts_read; /**< True if atts have been read for this group. */
|
|
|
|
NCindex* children; /**< NCindex<struct NC_GRP_INFO*> */
|
|
|
|
NCindex* dim; /**< NCindex<NC_DIM_INFO_T> * */
|
|
|
|
NCindex* att; /**< NCindex<NC_ATT_INFO_T> * */
|
|
|
|
NCindex* type; /**< NCindex<NC_TYPE_INFO_T> * */
|
2019-02-19 21:10:30 +08:00
|
|
|
/* Note that this is the list of vars with position == varid */
|
2019-08-22 08:31:37 +08:00
|
|
|
NCindex* vars; /**< NCindex<NC_VAR_INFO_T> * */
|
2010-06-03 21:24:43 +08:00
|
|
|
} NC_GRP_INFO_T;
|
|
|
|
|
2022-01-12 10:05:46 +08:00
|
|
|
/* These constants apply to the flags field in the
|
2010-06-03 21:24:43 +08:00
|
|
|
* HDF5_FILE_INFO_T defined below. */
|
2022-01-12 10:05:46 +08:00
|
|
|
#define NC_INDEF 0x01 /**< in define mode, cleared by ncendef */
|
2019-08-22 08:31:37 +08:00
|
|
|
|
|
|
|
/** This is the metadata we need to keep track of for each
|
Fix various problem around VLEN's
re: https://github.com/Unidata/netcdf-c/issues/541
re: https://github.com/Unidata/netcdf-c/issues/1208
re: https://github.com/Unidata/netcdf-c/issues/2078
re: https://github.com/Unidata/netcdf-c/issues/2041
re: https://github.com/Unidata/netcdf-c/issues/2143
For a long time, there have been known problems with the
management of complex types containing VLENs. This also
involves the string type because it is stored as a VLEN of
chars.
This PR (mostly) fixes this problem. But note that it adds new
functions to netcdf.h (see below) and this may require bumping
the .so number. These new functions can be removed, if desired,
in favor of functions in netcdf_aux.h, but netcdf.h seems the
better place for them because they are intended as alternatives
to the nc_free_vlen and nc_free_string functions already in
netcdf.h.
The term complex type refers to any type that directly or
transitively references a VLEN type. So an array of VLENS, a
compound with a VLEN field, and so on.
In order to properly handle instances of these complex types, it
is necessary to have function that can recursively walk
instances of such types to perform various actions on them. The
term "deep" is also used to mean recursive.
At the moment, the two operations needed by the netcdf library are:
* free'ing an instance of the complex type
* copying an instance of the complex type.
The current library does only shallow free and shallow copy of
complex types. This means that only the top level is properly
free'd or copied, but deep internal blocks in the instance are
not touched.
Note that the term "vector" will be used to mean a contiguous (in
memory) sequence of instances of some type. Given an array with,
say, dimensions 2 X 3 X 4, this will be stored in memory as a
vector of length 2*3*4=24 instances.
The use cases are primarily these.
## nc_get_vars
Suppose one is reading a vector of instances using nc_get_vars
(or nc_get_vara or nc_get_var, etc.). These functions will
return the vector in the top-level memory provided. All
interior blocks (form nested VLEN or strings) will have been
dynamically allocated.
After using this vector of instances, it is necessary to free
(aka reclaim) the dynamically allocated memory, otherwise a
memory leak occurs. So, the recursive reclaim function is used
to walk the returned instance vector and do a deep reclaim of
the data.
Currently functions are defined in netcdf.h that are supposed to
handle this: nc_free_vlen(), nc_free_vlens(), and
nc_free_string(). Unfortunately, these functions only do a
shallow free, so deeply nested instances are not properly
handled by them.
Note that internally, the provided data is immediately written so
there is no need to copy it. But the caller may need to reclaim the
data it passed into the function.
## nc_put_att
Suppose one is writing a vector of instances as the data of an attribute
using, say, nc_put_att.
Internally, the incoming attribute data must be copied and stored
so that changes/reclamation of the input data will not affect
the attribute.
Again, the code inside the netcdf library does only shallow copying
rather than deep copy. As a result, one sees effects such as described
in Github Issue https://github.com/Unidata/netcdf-c/issues/2143.
Also, after defining the attribute, it may be necessary for the user
to free the data that was provided as input to nc_put_att().
## nc_get_att
Suppose one is reading a vector of instances as the data of an attribute
using, say, nc_get_att.
Internally, the existing attribute data must be copied and returned
to the caller, and the caller is responsible for reclaiming
the returned data.
Again, the code inside the netcdf library does only shallow copying
rather than deep copy. So this can lead to memory leaks and errors
because the deep data is shared between the library and the user.
# Solution
The solution is to build properly recursive reclaim and copy
functions and use those as needed.
These recursive functions are defined in libdispatch/dinstance.c
and their signatures are defined in include/netcdf.h.
For back compatibility, corresponding "ncaux_XXX" functions
are defined in include/netcdf_aux.h.
````
int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count);
int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count);
int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy);
int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp);
````
There are two variants. The first two, nc_reclaim_data() and
nc_copy_data(), assume the top-level vector is managed by the
caller. For reclaim, this is so the user can use, for example, a
statically allocated vector. For copy, it assumes the user
provides the space into which the copy is stored.
The second two, nc_reclaim_data_all() and
nc_copy_data_all(), allows the functions to manage the
top-level. So for nc_reclaim_data_all, the top level is
assumed to be dynamically allocated and will be free'd by
nc_reclaim_data_all(). The nc_copy_data_all() function
will allocate the top level and return a pointer to it to the
user. The user can later pass that pointer to
nc_reclaim_data_all() to reclaim the instance(s).
# Internal Changes
The netcdf-c library internals are changed to use the proper
reclaim and copy functions. It turns out that the places where
these functions are needed is quite pervasive in the netcdf-c
library code. Using these functions also allows some
simplification of the code since the stdata and vldata fields of
NC_ATT_INFO are no longer needed. Currently this is commented
out using the SEPDATA \#define macro. When any bugs are largely
fixed, all this code will be removed.
# Known Bugs
1. There is still one known failure that has not been solved.
All the failures revolve around some variant of this .cdl file.
The proximate cause of failure is the use of a VLEN FillValue.
````
netcdf x {
types:
float(*) row_of_floats ;
dimensions:
m = 5 ;
variables:
row_of_floats ragged_array(m) ;
row_of_floats ragged_array:_FillValue = {-999} ;
data:
ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32},
{40, 41}, _ ;
}
````
When a solution is found, I will either add it to this PR or post a new PR.
# Related Changes
* Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data.
* Remove the --enable-unfixed-memory-leaks option.
* Remove the NC_VLENS_NOTEST code that suppresses some vlen tests.
* Document this change in docs/internal.md
* Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh.
* Mark types as fixed size or not (transitively) to optimize the reclaim
and copy functions.
# Misc. Changes
* Make Doxygen process libdispatch/daux.c
* Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
|
|
|
* netcdf-4/ file; used by libhdf5, libnczarr, and libdap4 */
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
|
2022-01-12 10:05:46 +08:00
|
|
|
typedef struct NC_FILE_INFO
|
2010-06-03 21:24:43 +08:00
|
|
|
{
|
2020-03-30 02:48:59 +08:00
|
|
|
NC_OBJ hdr;
|
2019-08-22 08:31:37 +08:00
|
|
|
NC *controller; /**< Pointer to containing NC. */
|
2023-02-28 07:07:36 +08:00
|
|
|
#ifdef USE_PARALLEL
|
2019-08-22 08:31:37 +08:00
|
|
|
MPI_Comm comm; /**< Copy of MPI Communicator used to open the file. */
|
|
|
|
MPI_Info info; /**< Copy of MPI Information Object used to open the file. */
|
2013-08-19 09:45:17 +08:00
|
|
|
#endif
|
2022-01-12 10:05:46 +08:00
|
|
|
int cmode; /**< Create/Open mode for the file. */
|
|
|
|
int flags; /**< State transition flags . */
|
2019-08-22 08:31:37 +08:00
|
|
|
nc_bool_t parallel; /**< True if file is open for parallel access */
|
|
|
|
nc_bool_t redef; /**< True if redefining an existing file */
|
2021-08-10 22:56:36 +08:00
|
|
|
nc_bool_t no_attr_create_order; /**< True if the creation order tracking of attributes is disabled (netcdf-4 only) */
|
2021-12-03 06:08:03 +08:00
|
|
|
nc_bool_t no_dimscale_attach; /**< True if attaching dimscales to variables is disabled (netcdf-4 only) */
|
2019-08-22 08:31:37 +08:00
|
|
|
int fill_mode; /**< Fill mode for vars - Unused internally currently */
|
|
|
|
nc_bool_t no_write; /**< true if nc_open has mode NC_NOWRITE. */
|
|
|
|
NC_GRP_INFO_T *root_grp; /**< Pointer to root group. */
|
|
|
|
short next_nc_grpid; /**< Next available group ID. */
|
|
|
|
int next_typeid; /**< Next available type ID. */
|
|
|
|
int next_dimid; /**< Next available dim ID. */
|
|
|
|
/* Provide convenience vectors indexed by the object id. This
|
|
|
|
allows for direct conversion of e.g. an nc_type to the
|
|
|
|
corresponding NC_TYPE_INFO_T object. */
|
|
|
|
NClist *alldims; /**< List of all dims. */
|
|
|
|
NClist *alltypes; /**< List of all types. */
|
|
|
|
NClist *allgroups; /**< List of all groups, including root group. */
|
|
|
|
void *format_file_info; /**< Pointer to binary format info for file. */
|
|
|
|
NC4_Provenance provenance; /**< File provenence info. */
|
|
|
|
struct NC4_Memio
|
|
|
|
{
|
|
|
|
NC_memio memio; /**< What we sent to image_init and what comes back. */
|
|
|
|
int locked; /**< Do not copy and do not free. */
|
|
|
|
int persist; /**< Should file be persisted out on close? */
|
|
|
|
int inmemory; /**< NC_INMEMORY flag was set. */
|
|
|
|
int diskless; /**< NC_DISKLESS flag was set => inmemory. */
|
|
|
|
int created; /**< 1 => create, 0 => open. */
|
|
|
|
unsigned int imageflags; /**< for H5LTopen_file_image. */
|
|
|
|
size_t initialsize; /**< Initial size. */
|
|
|
|
void *udata; /**< Extra memory allocated in NC4_image_init. */
|
2019-02-19 21:10:30 +08:00
|
|
|
} mem;
|
2018-06-22 21:08:09 +08:00
|
|
|
} NC_FILE_INFO_T;
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2022-01-30 06:27:52 +08:00
|
|
|
/* Collect global state info in one place */
|
|
|
|
typedef struct NCglobalstate {
|
|
|
|
int initialized;
|
|
|
|
char* tempdir; /* track a usable temp dir */
|
|
|
|
char* home; /* track $HOME */
|
|
|
|
char* cwd; /* track getcwd */
|
|
|
|
struct NCRCinfo* rcinfo; /* Currently only one rc file per session */
|
|
|
|
struct GlobalZarr { /* Zarr specific parameters */
|
|
|
|
char dimension_separator;
|
|
|
|
} zarr;
|
2023-12-03 12:03:59 +08:00
|
|
|
struct GlobalAWS { /* AWS S3 specific parameters/defaults */
|
|
|
|
char* default_region;
|
|
|
|
char* config_file;
|
|
|
|
char* profile;
|
|
|
|
char* access_key_id;
|
|
|
|
char* secret_access_key;
|
|
|
|
} aws;
|
2022-01-30 06:27:52 +08:00
|
|
|
struct Alignment { /* H5Pset_alignment parameters */
|
|
|
|
int defined; /* 1 => threshold and alignment explicitly set */
|
|
|
|
int threshold;
|
|
|
|
int alignment;
|
|
|
|
} alignment;
|
|
|
|
struct ChunkCache chunkcache;
|
|
|
|
} NCglobalstate;
|
|
|
|
|
2019-08-22 08:31:37 +08:00
|
|
|
/** Variable Length Datatype struct in memory. Must be identical to
|
2018-11-26 23:21:32 +08:00
|
|
|
* HDF5 hvl_t. (This is only used for VL sequences, not VL strings,
|
|
|
|
* which are stored in char *'s) */
|
2019-08-22 06:04:59 +08:00
|
|
|
typedef struct
|
|
|
|
{
|
2019-08-22 08:31:37 +08:00
|
|
|
size_t len; /**< Length of VL data (in base type units) */
|
|
|
|
void *p; /**< Pointer to VL data */
|
2018-11-26 23:21:32 +08:00
|
|
|
} nc_hvl_t;
|
|
|
|
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/* Misc functions */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int NC4_inq_atomic_type(nc_type typeid1, char *name, size_t *size);
|
|
|
|
extern int NC4_lookup_atomic_type(const char *name, nc_type* idp, size_t *sizep);
|
2016-03-09 00:41:24 +08:00
|
|
|
|
2015-08-20 17:42:05 +08:00
|
|
|
/* These functions convert between netcdf and HDF5 types. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_get_typelen_mem(NC_FILE_INFO_T *h5, nc_type xtype, size_t *len);
|
|
|
|
extern int nc4_convert_type(const void *src, void *dest, const nc_type src_type,
|
2021-08-25 15:31:26 +08:00
|
|
|
const nc_type dest_type, const size_t len, int *range_error,
|
|
|
|
const void *fill_value, int strict_nc3, int quantize_mode,
|
|
|
|
int nsd);
|
2010-06-03 21:24:43 +08:00
|
|
|
|
|
|
|
/* These functions do HDF5 things. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_reopen_dataset(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
|
|
|
|
extern int nc4_read_atts(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2018-08-22 21:03:37 +08:00
|
|
|
/* Find items in the in-memory lists of metadata. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_find_nc_grp_h5(int ncid, NC **nc, NC_GRP_INFO_T **grp,
|
2018-08-22 21:03:37 +08:00
|
|
|
NC_FILE_INFO_T **h5);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_find_grp_h5(int ncid, NC_GRP_INFO_T **grp, NC_FILE_INFO_T **h5);
|
|
|
|
extern int nc4_find_nc4_grp(int ncid, NC_GRP_INFO_T **grp);
|
|
|
|
extern int nc4_find_dim(NC_GRP_INFO_T *grp, int dimid, NC_DIM_INFO_T **dim,
|
2018-08-22 21:03:37 +08:00
|
|
|
NC_GRP_INFO_T **dim_grp);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_find_var(NC_GRP_INFO_T *grp, const char *name, NC_VAR_INFO_T **var);
|
|
|
|
extern int nc4_find_dim_len(NC_GRP_INFO_T *grp, int dimid, size_t **len);
|
|
|
|
extern int nc4_find_type(const NC_FILE_INFO_T *h5, int typeid1, NC_TYPE_INFO_T **type);
|
|
|
|
extern NC_TYPE_INFO_T *nc4_rec_find_named_type(NC_GRP_INFO_T *start_grp, char *name);
|
|
|
|
extern NC_TYPE_INFO_T *nc4_rec_find_equal_type(NC_GRP_INFO_T *start_grp, int ncid1,
|
2018-08-22 21:03:37 +08:00
|
|
|
NC_TYPE_INFO_T *type);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_find_nc_att(int ncid, int varid, const char *name, int attnum,
|
2019-02-19 21:10:30 +08:00
|
|
|
NC_ATT_INFO_T **att);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_find_grp_h5_var(int ncid, int varid, NC_FILE_INFO_T **h5,
|
2018-08-22 21:03:37 +08:00
|
|
|
NC_GRP_INFO_T **grp, NC_VAR_INFO_T **var);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_find_grp_att(NC_GRP_INFO_T *grp, int varid, const char *name,
|
2018-08-22 21:03:37 +08:00
|
|
|
int attnum, NC_ATT_INFO_T **att);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_get_typeclass(const NC_FILE_INFO_T *h5, nc_type xtype,
|
2014-02-12 07:12:08 +08:00
|
|
|
int *type_class);
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2014-02-12 07:12:08 +08:00
|
|
|
/* Free various types */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_type_free(NC_TYPE_INFO_T *type);
|
2010-06-03 21:24:43 +08:00
|
|
|
|
2014-02-12 07:12:08 +08:00
|
|
|
/* These list functions add and delete vars, atts. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_nc4f_list_add(NC *nc, const char *path, int mode);
|
|
|
|
extern int nc4_nc4f_list_del(NC_FILE_INFO_T *h5);
|
|
|
|
extern int nc4_file_list_add(int ncid, const char *path, int mode,
|
2019-08-04 07:19:13 +08:00
|
|
|
void **dispatchdata);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_file_list_get(int ncid, char **path, int *mode,
|
2019-08-04 07:19:13 +08:00
|
|
|
void **dispatchdata);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_file_list_del(int ncid);
|
|
|
|
extern int nc4_file_change_ncid(int ncid, unsigned short new_ncid_index);
|
|
|
|
extern int nc4_var_list_add(NC_GRP_INFO_T* grp, const char* name, int ndims,
|
2018-11-17 01:07:54 +08:00
|
|
|
NC_VAR_INFO_T **var);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_var_list_add2(NC_GRP_INFO_T* grp, const char* name,
|
2018-11-17 01:07:54 +08:00
|
|
|
NC_VAR_INFO_T **var);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_var_set_ndims(NC_VAR_INFO_T *var, int ndims);
|
|
|
|
extern int nc4_var_list_del(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
|
|
|
|
extern int nc4_dim_list_add(NC_GRP_INFO_T *grp, const char *name, size_t len,
|
2018-11-17 01:07:54 +08:00
|
|
|
int assignedid, NC_DIM_INFO_T **dim);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_dim_list_del(NC_GRP_INFO_T *grp, NC_DIM_INFO_T *dim);
|
|
|
|
extern int nc4_type_new(size_t size, const char *name, int assignedid,
|
2018-11-17 01:07:54 +08:00
|
|
|
NC_TYPE_INFO_T **type);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_type_list_add(NC_GRP_INFO_T *grp, size_t size, const char *name,
|
2018-11-17 01:07:54 +08:00
|
|
|
NC_TYPE_INFO_T **type);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_type_list_del(NC_GRP_INFO_T *grp, NC_TYPE_INFO_T *type);
|
|
|
|
extern int nc4_type_free(NC_TYPE_INFO_T *type);
|
|
|
|
extern int nc4_field_list_add(NC_TYPE_INFO_T* parent, const char *name,
|
2019-02-19 21:10:30 +08:00
|
|
|
size_t offset, nc_type xtype, int ndims,
|
2018-11-16 23:26:09 +08:00
|
|
|
const int *dim_sizesp);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_att_list_add(NCindex *list, const char *name, NC_ATT_INFO_T **att);
|
|
|
|
extern int nc4_att_list_del(NCindex *list, NC_ATT_INFO_T *att);
|
|
|
|
extern int nc4_grp_list_add(NC_FILE_INFO_T *h5, NC_GRP_INFO_T *parent, char *name,
|
2018-11-17 01:07:54 +08:00
|
|
|
NC_GRP_INFO_T **grp);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_build_root_grp(NC_FILE_INFO_T *h5);
|
|
|
|
extern int nc4_rec_grp_del(NC_GRP_INFO_T *grp);
|
|
|
|
extern int nc4_enum_member_add(NC_TYPE_INFO_T *type, size_t size, const char *name,
|
2018-11-17 01:07:54 +08:00
|
|
|
const void *value);
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_att_free(NC_ATT_INFO_T *att);
|
2013-12-01 13:20:28 +08:00
|
|
|
|
2010-06-03 21:24:43 +08:00
|
|
|
/* Check and normalize names. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int NC_check_name(const char *name);
|
|
|
|
extern int nc4_check_name(const char *name, char *norm_name);
|
|
|
|
extern int nc4_normalize_name(const char *name, char *norm_name);
|
|
|
|
extern int nc4_check_dup_name(NC_GRP_INFO_T *grp, char *norm_name);
|
2010-06-03 21:24:43 +08:00
|
|
|
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/* Get the fill value for a var. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_get_fill_value(NC_FILE_INFO_T *h5, NC_VAR_INFO_T *var, void **fillp);
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
|
2022-01-11 06:27:16 +08:00
|
|
|
/* Find default fill value for atomic type. */
|
|
|
|
extern int nc4_get_default_atomic_fill_value(nc_type, void *fill_value);
|
|
|
|
|
|
|
|
/* Find default fill value for any type */
|
|
|
|
extern int nc4_get_default_fill_value(NC_TYPE_INFO_T*, void *fill_value);
|
2018-07-19 21:23:03 +08:00
|
|
|
|
2018-11-26 23:21:32 +08:00
|
|
|
/* Get an att given pointers to file, group, and perhaps ver info. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_get_att_ptrs(NC_FILE_INFO_T *h5, NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var,
|
2018-11-26 23:21:32 +08:00
|
|
|
const char *name, nc_type *xtype, nc_type mem_type,
|
|
|
|
size_t *lenp, int *attnum, void *data);
|
|
|
|
|
Improve performance of the nc_reclaim_data and nc_copy_data functions.
re: Issue https://github.com/Unidata/netcdf-c/issues/2685
re: PR https://github.com/Unidata/netcdf-c/pull/2179
As noted in PR https://github.com/Unidata/netcdf-c/pull/2179,
the old code did not allow for reclaiming instances of types,
nor for properly copying them. That PR provided new functions
capable of reclaiming/copying instances of arbitrary types.
However, as noted by Issue https://github.com/Unidata/netcdf-c/issues/2685, using these
most general functions resulted in a significant performance
degradation, even for common cases.
This PR attempts to mitigate the cost of using the general
reclaim/copy functions in two ways.
First, the previous functions operating at the top level by
using ncid and typeid arguments. These functions were augmented
with equivalent versions that used the netcdf-c library internal
data structures to allow direct access to needed information.
These new functions are used internally to the library.
The second mitigation involves optimizing the internal functions
by providing early tests for common cases. This avoids
unnecessary recursive function calls.
The overall result is a significant improvement in speed by a
factor of roughly twenty -- your mileage may vary. These
optimized functions are still not as fast as the original (more
limited) functions, but they are getting close. Additional optimizations are
possible. But the cost is a significant "uglification" of the
code that I deemed a step too far, at least for now.
## Misc. Changes
1. Added a test case to check the proper reclamation/copy of complex types.
2. Found and fixed some places where nc_reclaim/copy should have been used.
3. Replaced, in the netcdf-c library, (almost all) occurrences of nc_reclaim_copy with calls to NC_reclaim/copy. This plus the optimizations is the primary speed-up mechanism.
4. In DAP4, the metadata is held in a substrate in-memory file; this required some changes so that the reclaim/copy code accessed that substrate dispatcher rather than the DAP4 dispatcher.
5. Re-factored and isolated the code that computes if a type is (transitively) variable-sized or not.
6. Clean up the reclamation code in ncgen; adding the use of nc_reclaim exposed some memory problems.
2023-05-21 07:11:25 +08:00
|
|
|
/* Get variable/fixed size flag for type (ncid API level)*/
|
Fix various problem around VLEN's
re: https://github.com/Unidata/netcdf-c/issues/541
re: https://github.com/Unidata/netcdf-c/issues/1208
re: https://github.com/Unidata/netcdf-c/issues/2078
re: https://github.com/Unidata/netcdf-c/issues/2041
re: https://github.com/Unidata/netcdf-c/issues/2143
For a long time, there have been known problems with the
management of complex types containing VLENs. This also
involves the string type because it is stored as a VLEN of
chars.
This PR (mostly) fixes this problem. But note that it adds new
functions to netcdf.h (see below) and this may require bumping
the .so number. These new functions can be removed, if desired,
in favor of functions in netcdf_aux.h, but netcdf.h seems the
better place for them because they are intended as alternatives
to the nc_free_vlen and nc_free_string functions already in
netcdf.h.
The term complex type refers to any type that directly or
transitively references a VLEN type. So an array of VLENS, a
compound with a VLEN field, and so on.
In order to properly handle instances of these complex types, it
is necessary to have function that can recursively walk
instances of such types to perform various actions on them. The
term "deep" is also used to mean recursive.
At the moment, the two operations needed by the netcdf library are:
* free'ing an instance of the complex type
* copying an instance of the complex type.
The current library does only shallow free and shallow copy of
complex types. This means that only the top level is properly
free'd or copied, but deep internal blocks in the instance are
not touched.
Note that the term "vector" will be used to mean a contiguous (in
memory) sequence of instances of some type. Given an array with,
say, dimensions 2 X 3 X 4, this will be stored in memory as a
vector of length 2*3*4=24 instances.
The use cases are primarily these.
## nc_get_vars
Suppose one is reading a vector of instances using nc_get_vars
(or nc_get_vara or nc_get_var, etc.). These functions will
return the vector in the top-level memory provided. All
interior blocks (form nested VLEN or strings) will have been
dynamically allocated.
After using this vector of instances, it is necessary to free
(aka reclaim) the dynamically allocated memory, otherwise a
memory leak occurs. So, the recursive reclaim function is used
to walk the returned instance vector and do a deep reclaim of
the data.
Currently functions are defined in netcdf.h that are supposed to
handle this: nc_free_vlen(), nc_free_vlens(), and
nc_free_string(). Unfortunately, these functions only do a
shallow free, so deeply nested instances are not properly
handled by them.
Note that internally, the provided data is immediately written so
there is no need to copy it. But the caller may need to reclaim the
data it passed into the function.
## nc_put_att
Suppose one is writing a vector of instances as the data of an attribute
using, say, nc_put_att.
Internally, the incoming attribute data must be copied and stored
so that changes/reclamation of the input data will not affect
the attribute.
Again, the code inside the netcdf library does only shallow copying
rather than deep copy. As a result, one sees effects such as described
in Github Issue https://github.com/Unidata/netcdf-c/issues/2143.
Also, after defining the attribute, it may be necessary for the user
to free the data that was provided as input to nc_put_att().
## nc_get_att
Suppose one is reading a vector of instances as the data of an attribute
using, say, nc_get_att.
Internally, the existing attribute data must be copied and returned
to the caller, and the caller is responsible for reclaiming
the returned data.
Again, the code inside the netcdf library does only shallow copying
rather than deep copy. So this can lead to memory leaks and errors
because the deep data is shared between the library and the user.
# Solution
The solution is to build properly recursive reclaim and copy
functions and use those as needed.
These recursive functions are defined in libdispatch/dinstance.c
and their signatures are defined in include/netcdf.h.
For back compatibility, corresponding "ncaux_XXX" functions
are defined in include/netcdf_aux.h.
````
int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count);
int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count);
int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy);
int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp);
````
There are two variants. The first two, nc_reclaim_data() and
nc_copy_data(), assume the top-level vector is managed by the
caller. For reclaim, this is so the user can use, for example, a
statically allocated vector. For copy, it assumes the user
provides the space into which the copy is stored.
The second two, nc_reclaim_data_all() and
nc_copy_data_all(), allows the functions to manage the
top-level. So for nc_reclaim_data_all, the top level is
assumed to be dynamically allocated and will be free'd by
nc_reclaim_data_all(). The nc_copy_data_all() function
will allocate the top level and return a pointer to it to the
user. The user can later pass that pointer to
nc_reclaim_data_all() to reclaim the instance(s).
# Internal Changes
The netcdf-c library internals are changed to use the proper
reclaim and copy functions. It turns out that the places where
these functions are needed is quite pervasive in the netcdf-c
library code. Using these functions also allows some
simplification of the code since the stdata and vldata fields of
NC_ATT_INFO are no longer needed. Currently this is commented
out using the SEPDATA \#define macro. When any bugs are largely
fixed, all this code will be removed.
# Known Bugs
1. There is still one known failure that has not been solved.
All the failures revolve around some variant of this .cdl file.
The proximate cause of failure is the use of a VLEN FillValue.
````
netcdf x {
types:
float(*) row_of_floats ;
dimensions:
m = 5 ;
variables:
row_of_floats ragged_array(m) ;
row_of_floats ragged_array:_FillValue = {-999} ;
data:
ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32},
{40, 41}, _ ;
}
````
When a solution is found, I will either add it to this PR or post a new PR.
# Related Changes
* Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data.
* Remove the --enable-unfixed-memory-leaks option.
* Remove the NC_VLENS_NOTEST code that suppresses some vlen tests.
* Document this change in docs/internal.md
* Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh.
* Mark types as fixed size or not (transitively) to optimize the reclaim
and copy functions.
# Misc. Changes
* Make Doxygen process libdispatch/daux.c
* Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
|
|
|
extern int NC4_inq_type_fixed_size(int ncid, nc_type xtype, int* isfixedsizep);
|
Improve performance of the nc_reclaim_data and nc_copy_data functions.
re: Issue https://github.com/Unidata/netcdf-c/issues/2685
re: PR https://github.com/Unidata/netcdf-c/pull/2179
As noted in PR https://github.com/Unidata/netcdf-c/pull/2179,
the old code did not allow for reclaiming instances of types,
nor for properly copying them. That PR provided new functions
capable of reclaiming/copying instances of arbitrary types.
However, as noted by Issue https://github.com/Unidata/netcdf-c/issues/2685, using these
most general functions resulted in a significant performance
degradation, even for common cases.
This PR attempts to mitigate the cost of using the general
reclaim/copy functions in two ways.
First, the previous functions operating at the top level by
using ncid and typeid arguments. These functions were augmented
with equivalent versions that used the netcdf-c library internal
data structures to allow direct access to needed information.
These new functions are used internally to the library.
The second mitigation involves optimizing the internal functions
by providing early tests for common cases. This avoids
unnecessary recursive function calls.
The overall result is a significant improvement in speed by a
factor of roughly twenty -- your mileage may vary. These
optimized functions are still not as fast as the original (more
limited) functions, but they are getting close. Additional optimizations are
possible. But the cost is a significant "uglification" of the
code that I deemed a step too far, at least for now.
## Misc. Changes
1. Added a test case to check the proper reclamation/copy of complex types.
2. Found and fixed some places where nc_reclaim/copy should have been used.
3. Replaced, in the netcdf-c library, (almost all) occurrences of nc_reclaim_copy with calls to NC_reclaim/copy. This plus the optimizations is the primary speed-up mechanism.
4. In DAP4, the metadata is held in a substrate in-memory file; this required some changes so that the reclaim/copy code accessed that substrate dispatcher rather than the DAP4 dispatcher.
5. Re-factored and isolated the code that computes if a type is (transitively) variable-sized or not.
6. Clean up the reclamation code in ncgen; adding the use of nc_reclaim exposed some memory problems.
2023-05-21 07:11:25 +08:00
|
|
|
/* Manage the fixed/var sized'ness of a type */
|
|
|
|
extern int NC4_recheck_varsize(NC_TYPE_INFO_T* parenttype, nc_type addedtype);
|
|
|
|
extern int NC4_set_varsize(NC_TYPE_INFO_T* parenttype);
|
2023-06-22 04:46:22 +08:00
|
|
|
extern int NC4_var_varsized(NC_VAR_INFO_T* var);
|
Fix various problem around VLEN's
re: https://github.com/Unidata/netcdf-c/issues/541
re: https://github.com/Unidata/netcdf-c/issues/1208
re: https://github.com/Unidata/netcdf-c/issues/2078
re: https://github.com/Unidata/netcdf-c/issues/2041
re: https://github.com/Unidata/netcdf-c/issues/2143
For a long time, there have been known problems with the
management of complex types containing VLENs. This also
involves the string type because it is stored as a VLEN of
chars.
This PR (mostly) fixes this problem. But note that it adds new
functions to netcdf.h (see below) and this may require bumping
the .so number. These new functions can be removed, if desired,
in favor of functions in netcdf_aux.h, but netcdf.h seems the
better place for them because they are intended as alternatives
to the nc_free_vlen and nc_free_string functions already in
netcdf.h.
The term complex type refers to any type that directly or
transitively references a VLEN type. So an array of VLENS, a
compound with a VLEN field, and so on.
In order to properly handle instances of these complex types, it
is necessary to have function that can recursively walk
instances of such types to perform various actions on them. The
term "deep" is also used to mean recursive.
At the moment, the two operations needed by the netcdf library are:
* free'ing an instance of the complex type
* copying an instance of the complex type.
The current library does only shallow free and shallow copy of
complex types. This means that only the top level is properly
free'd or copied, but deep internal blocks in the instance are
not touched.
Note that the term "vector" will be used to mean a contiguous (in
memory) sequence of instances of some type. Given an array with,
say, dimensions 2 X 3 X 4, this will be stored in memory as a
vector of length 2*3*4=24 instances.
The use cases are primarily these.
## nc_get_vars
Suppose one is reading a vector of instances using nc_get_vars
(or nc_get_vara or nc_get_var, etc.). These functions will
return the vector in the top-level memory provided. All
interior blocks (form nested VLEN or strings) will have been
dynamically allocated.
After using this vector of instances, it is necessary to free
(aka reclaim) the dynamically allocated memory, otherwise a
memory leak occurs. So, the recursive reclaim function is used
to walk the returned instance vector and do a deep reclaim of
the data.
Currently functions are defined in netcdf.h that are supposed to
handle this: nc_free_vlen(), nc_free_vlens(), and
nc_free_string(). Unfortunately, these functions only do a
shallow free, so deeply nested instances are not properly
handled by them.
Note that internally, the provided data is immediately written so
there is no need to copy it. But the caller may need to reclaim the
data it passed into the function.
## nc_put_att
Suppose one is writing a vector of instances as the data of an attribute
using, say, nc_put_att.
Internally, the incoming attribute data must be copied and stored
so that changes/reclamation of the input data will not affect
the attribute.
Again, the code inside the netcdf library does only shallow copying
rather than deep copy. As a result, one sees effects such as described
in Github Issue https://github.com/Unidata/netcdf-c/issues/2143.
Also, after defining the attribute, it may be necessary for the user
to free the data that was provided as input to nc_put_att().
## nc_get_att
Suppose one is reading a vector of instances as the data of an attribute
using, say, nc_get_att.
Internally, the existing attribute data must be copied and returned
to the caller, and the caller is responsible for reclaiming
the returned data.
Again, the code inside the netcdf library does only shallow copying
rather than deep copy. So this can lead to memory leaks and errors
because the deep data is shared between the library and the user.
# Solution
The solution is to build properly recursive reclaim and copy
functions and use those as needed.
These recursive functions are defined in libdispatch/dinstance.c
and their signatures are defined in include/netcdf.h.
For back compatibility, corresponding "ncaux_XXX" functions
are defined in include/netcdf_aux.h.
````
int nc_reclaim_data(int ncid, nc_type xtypeid, void* memory, size_t count);
int nc_reclaim_data_all(int ncid, nc_type xtypeid, void* memory, size_t count);
int nc_copy_data(int ncid, nc_type xtypeid, const void* memory, size_t count, void* copy);
int nc_copy_data_all(int ncid, nc_type xtypeid, const void* memory, size_t count, void** copyp);
````
There are two variants. The first two, nc_reclaim_data() and
nc_copy_data(), assume the top-level vector is managed by the
caller. For reclaim, this is so the user can use, for example, a
statically allocated vector. For copy, it assumes the user
provides the space into which the copy is stored.
The second two, nc_reclaim_data_all() and
nc_copy_data_all(), allows the functions to manage the
top-level. So for nc_reclaim_data_all, the top level is
assumed to be dynamically allocated and will be free'd by
nc_reclaim_data_all(). The nc_copy_data_all() function
will allocate the top level and return a pointer to it to the
user. The user can later pass that pointer to
nc_reclaim_data_all() to reclaim the instance(s).
# Internal Changes
The netcdf-c library internals are changed to use the proper
reclaim and copy functions. It turns out that the places where
these functions are needed is quite pervasive in the netcdf-c
library code. Using these functions also allows some
simplification of the code since the stdata and vldata fields of
NC_ATT_INFO are no longer needed. Currently this is commented
out using the SEPDATA \#define macro. When any bugs are largely
fixed, all this code will be removed.
# Known Bugs
1. There is still one known failure that has not been solved.
All the failures revolve around some variant of this .cdl file.
The proximate cause of failure is the use of a VLEN FillValue.
````
netcdf x {
types:
float(*) row_of_floats ;
dimensions:
m = 5 ;
variables:
row_of_floats ragged_array(m) ;
row_of_floats ragged_array:_FillValue = {-999} ;
data:
ragged_array = {10, 11, 12, 13, 14}, {20, 21, 22, 23}, {30, 31, 32},
{40, 41}, _ ;
}
````
When a solution is found, I will either add it to this PR or post a new PR.
# Related Changes
* Mark nc_free_vlen(s) as deprecated in favor of ncaux_reclaim_data.
* Remove the --enable-unfixed-memory-leaks option.
* Remove the NC_VLENS_NOTEST code that suppresses some vlen tests.
* Document this change in docs/internal.md
* Disable the tst_vlen_data test in ncdump/tst_nccopy4.sh.
* Mark types as fixed size or not (transitively) to optimize the reclaim
and copy functions.
# Misc. Changes
* Make Doxygen process libdispatch/daux.c
* Make sure the NC_ATT_INFO_T.container field is set.
2022-01-09 09:30:00 +08:00
|
|
|
|
2018-07-17 22:00:47 +08:00
|
|
|
/* Close the file. */
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int nc4_close_netcdf4_file(NC_FILE_INFO_T *h5, int abort, NC_memio *memio);
|
2018-07-17 22:00:47 +08:00
|
|
|
|
2020-09-28 02:43:46 +08:00
|
|
|
/* Compute default chunksizes */
|
|
|
|
extern int nc4_find_default_chunksizes2(NC_GRP_INFO_T *grp, NC_VAR_INFO_T *var);
|
|
|
|
extern int nc4_check_chunksizes(NC_GRP_INFO_T* grp, NC_VAR_INFO_T* var, const size_t* chunksizes);
|
|
|
|
|
2018-10-31 10:48:12 +08:00
|
|
|
/* HDF5 initialization/finalization */
|
2016-05-04 11:17:06 +08:00
|
|
|
extern int nc4_hdf5_initialized;
|
|
|
|
extern void nc4_hdf5_initialize(void);
|
2018-10-31 10:48:12 +08:00
|
|
|
extern void nc4_hdf5_finalize(void);
|
2016-01-29 06:03:40 +08:00
|
|
|
|
2010-06-03 21:24:43 +08:00
|
|
|
/* This is only included if --enable-logging is used for configure; it
|
|
|
|
prints info about the metadata to stderr. */
|
|
|
|
#ifdef LOGGING
|
2020-08-18 09:15:47 +08:00
|
|
|
extern int log_metadata_nc(NC_FILE_INFO_T *h5);
|
2010-06-03 21:24:43 +08:00
|
|
|
#endif
|
|
|
|
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/** @internal Names of atomic types. */
|
|
|
|
extern const char* nc4_atomic_name[NUM_ATOMIC_TYPES];
|
2020-02-17 03:59:33 +08:00
|
|
|
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/* Binary searcher for reserved attributes */
|
|
|
|
extern const NC_reservedatt* NC_findreserved(const char* name);
|
2020-02-17 03:59:33 +08:00
|
|
|
|
2022-01-30 06:27:52 +08:00
|
|
|
/* Global State Management */
|
|
|
|
extern NCglobalstate* NC_getglobalstate(void);
|
|
|
|
extern void NC_freeglobalstate(void);
|
|
|
|
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
/* Generic reserved Attributes */
|
|
|
|
#define NC_ATT_REFERENCE_LIST "REFERENCE_LIST"
|
|
|
|
#define NC_ATT_CLASS "CLASS"
|
|
|
|
#define NC_ATT_DIMENSION_LIST "DIMENSION_LIST"
|
|
|
|
#define NC_ATT_NAME "NAME"
|
|
|
|
#define NC_ATT_COORDINATES "_Netcdf4Coordinates" /*see hdf5internal.h:COORDINATES*/
|
|
|
|
#define NC_ATT_FORMAT "_Format"
|
|
|
|
#define NC_ATT_DIMID_NAME "_Netcdf4Dimid"
|
2022-08-28 10:21:13 +08:00
|
|
|
#define NC_ATT_FILLVALUE "_FillValue"
|
This PR adds EXPERIMENTAL support for accessing data in the
cloud using a variant of the Zarr protocol and storage
format. This enhancement is generically referred to as "NCZarr".
The data model supported by NCZarr is netcdf-4 minus the user-defined
types and the String type. In this sense it is similar to the CDF-5
data model.
More detailed information about enabling and using NCZarr is
described in the document NUG/nczarr.md and in a
[Unidata Developer's blog entry](https://www.unidata.ucar.edu/blogs/developer/en/entry/overview-of-zarr-support-in).
WARNING: this code has had limited testing, so do use this version
for production work. Also, performance improvements are ongoing.
Note especially the following platform matrix of successful tests:
Platform | Build System | S3 support
------------------------------------
Linux+gcc | Automake | yes
Linux+gcc | CMake | yes
Visual Studio | CMake | no
Additionally, and as a consequence of the addition of NCZarr,
major changes have been made to the Filter API. NOTE: NCZarr
does not yet support filters, but these changes are enablers for
that support in the future. Note that it is possible
(probable?) that there will be some accidental reversions if the
changes here did not correctly mimic the existing filter testing.
In any case, previously filter ids and parameters were of type
unsigned int. In order to support the more general zarr filter
model, this was all converted to char*. The old HDF5-specific,
unsigned int operations are still supported but they are
wrappers around the new, char* based nc_filterx_XXX functions.
This entailed at least the following changes:
1. Added the files libdispatch/dfilterx.c and include/ncfilter.h
2. Some filterx utilities have been moved to libdispatch/daux.c
3. A new entry, "filter_actions" was added to the NCDispatch table
and the version bumped.
4. An overly complex set of structs was created to support funnelling
all of the filterx operations thru a single dispatch
"filter_actions" entry.
5. Move common code to from libhdf5 to libsrc4 so that it is accessible
to nczarr.
Changes directly related to Zarr:
1. Modified CMakeList.txt and configure.ac to support both C and C++
-- this is in support of S3 support via the awd-sdk libraries.
2. Define a size64_t type to support nczarr.
3. More reworking of libdispatch/dinfermodel.c to
support zarr and to regularize the structure of the fragments
section of a URL.
Changes not directly related to Zarr:
1. Make client-side filter registration be conditional, with default off.
2. Hack include/nc4internal.h to make some flags added by Ed be unique:
e.g. NC_CREAT, NC_INDEF, etc.
3. cleanup include/nchttp.h and libdispatch/dhttp.c.
4. Misc. changes to support compiling under Visual Studio including:
* Better testing under windows for dirent.h and opendir and closedir.
5. Misc. changes to the oc2 code to support various libcurl CURLOPT flags
and to centralize error reporting.
6. By default, suppress the vlen tests that have unfixed memory leaks; add option to enable them.
7. Make part of the nc_test/test_byterange.sh test be contingent on remotetest.unidata.ucar.edu being accessible.
Changes Left TO-DO:
1. fix provenance code, it is too HDF5 specific.
2020-06-29 08:02:47 +08:00
|
|
|
#define NC_ATT_NC3_STRICT_NAME "_nc3_strict"
|
2021-02-25 04:46:11 +08:00
|
|
|
#define NC_XARRAY_DIMS "_ARRAY_DIMENSIONS"
|
2021-09-03 07:04:26 +08:00
|
|
|
#define NC_ATT_CODECS "_Codecs"
|
2022-08-28 10:21:13 +08:00
|
|
|
#define NC_NCZARR_ATTR "_nczarr_attr"
|
|
|
|
#define NC_NCZARR_ATTR_UC "_NCZARR_ATTR"
|
|
|
|
#define NC_NCZARR_MAXSTRLEN_ATTR "_nczarr_maxstrlen"
|
|
|
|
#define NC_NCZARR_DEFAULT_MAXSTRLEN_ATTR "_nczarr_default_maxstrlen"
|
2020-08-18 09:15:47 +08:00
|
|
|
|
2018-07-12 21:05:21 +08:00
|
|
|
#endif /* _NC4INTERNAL_ */
|