2020-12-17 11:48:02 +08:00
|
|
|
#ifdef HAVE_CONFIG_H
|
|
|
|
#include "config.h"
|
|
|
|
#endif
|
|
|
|
|
2024-01-15 18:20:52 +08:00
|
|
|
#include <stddef.h>
|
2020-12-17 11:48:02 +08:00
|
|
|
#include <stdlib.h>
|
|
|
|
#include <stdio.h>
|
|
|
|
#include <string.h>
|
|
|
|
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
#ifdef HAVE_UNISTD_H
|
|
|
|
#include <unistd.h>
|
|
|
|
#endif
|
|
|
|
|
2020-12-17 11:48:02 +08:00
|
|
|
#ifdef HAVE_GETOPT_H
|
|
|
|
#include <getopt.h>
|
|
|
|
#endif
|
|
|
|
|
2021-05-27 04:27:27 +08:00
|
|
|
#if defined(_WIN32) && !defined(__MINGW32__)
|
2020-12-17 11:48:02 +08:00
|
|
|
#include "XGetopt.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#include "netcdf.h"
|
|
|
|
#include "ncpathmgr.h"
|
2021-01-07 04:35:59 +08:00
|
|
|
#include "nclog.h"
|
2020-12-17 11:48:02 +08:00
|
|
|
|
2024-02-16 17:57:45 +08:00
|
|
|
#ifdef USE_HDF5
|
2020-12-17 11:48:02 +08:00
|
|
|
#include <hdf5.h>
|
|
|
|
#include <H5DSpublic.h>
|
|
|
|
#endif
|
|
|
|
|
2024-03-19 04:37:39 +08:00
|
|
|
#ifdef NETCDF_ENABLE_NCZARR
|
2020-12-17 11:48:02 +08:00
|
|
|
#include "zincludes.h"
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#undef DEBUG
|
|
|
|
|
|
|
|
/* Short Aliases */
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
#ifdef USE_HDF5
|
2020-12-17 11:48:02 +08:00
|
|
|
#define H5
|
|
|
|
#endif
|
2024-03-19 04:37:39 +08:00
|
|
|
#ifdef NETCDF_ENABLE_NCZARR
|
2020-12-17 11:48:02 +08:00
|
|
|
#define NZ
|
|
|
|
#endif
|
|
|
|
|
|
|
|
typedef struct Format {
|
|
|
|
int format;
|
|
|
|
char file_name[NC_MAX_NAME];
|
|
|
|
char var_name[NC_MAX_NAME];
|
|
|
|
int fillvalue;
|
|
|
|
int debug;
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
int linear;
|
|
|
|
int holevalue;
|
2024-06-20 08:09:29 +08:00
|
|
|
size_t rank;
|
2020-12-17 11:48:02 +08:00
|
|
|
size_t dimlens[NC_MAX_VAR_DIMS];
|
|
|
|
size_t chunklens[NC_MAX_VAR_DIMS];
|
|
|
|
size_t chunkcounts[NC_MAX_VAR_DIMS];
|
|
|
|
size_t chunkprod;
|
|
|
|
size_t dimprod;
|
|
|
|
nc_type xtype;
|
|
|
|
} Format;
|
|
|
|
|
|
|
|
typedef struct Odometer {
|
2024-06-20 08:09:29 +08:00
|
|
|
size_t rank; /*rank */
|
2020-12-17 11:48:02 +08:00
|
|
|
size_t start[NC_MAX_VAR_DIMS];
|
|
|
|
size_t stop[NC_MAX_VAR_DIMS];
|
|
|
|
size_t max[NC_MAX_VAR_DIMS]; /* max size of ith index */
|
|
|
|
size_t index[NC_MAX_VAR_DIMS]; /* current value of the odometer*/
|
|
|
|
} Odometer;
|
|
|
|
|
|
|
|
#define floordiv(x,y) ((x) / (y))
|
|
|
|
#define ceildiv(x,y) (((x) % (y)) == 0 ? ((x) / (y)) : (((x) / (y)) + 1))
|
|
|
|
|
|
|
|
static char* captured[4096];
|
2024-06-20 08:09:29 +08:00
|
|
|
static size_t ncap = 0;
|
2020-12-17 11:48:02 +08:00
|
|
|
|
|
|
|
extern int nc__testurl(const char*,char**);
|
|
|
|
|
2024-06-20 08:09:29 +08:00
|
|
|
Odometer* odom_new(size_t rank, const size_t* stop, const size_t* max);
|
2020-12-17 11:48:02 +08:00
|
|
|
void odom_free(Odometer* odom);
|
|
|
|
int odom_more(Odometer* odom);
|
|
|
|
int odom_next(Odometer* odom);
|
|
|
|
size_t* odom_indices(Odometer* odom);
|
|
|
|
size_t odom_offset(Odometer* odom);
|
|
|
|
const char* odom_print(Odometer* odom);
|
|
|
|
|
|
|
|
static void
|
|
|
|
usage(int err)
|
|
|
|
{
|
|
|
|
if(err != 0) {
|
|
|
|
fprintf(stderr,"Error: (%d) %s\n",err,nc_strerror(err));
|
|
|
|
}
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
fprintf(stderr,"usage: ncdumpchunks [-b] -v <var> <file> \n");
|
2020-12-17 11:48:02 +08:00
|
|
|
fflush(stderr);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
const char*
|
|
|
|
printvector(int rank, size_t* vec)
|
|
|
|
{
|
|
|
|
char svec[NC_MAX_VAR_DIMS*3+1];
|
|
|
|
int i;
|
|
|
|
svec[0] = '\0';
|
|
|
|
for(i=0;i<rank;i++) {
|
|
|
|
char s[3+1];
|
|
|
|
if(i > 0) strlcat(svec,",",sizeof(svec));
|
|
|
|
snprintf(s,sizeof(s),"%u",(unsigned)vec[i]);
|
|
|
|
strlcat(svec,s,sizeof(svec));
|
|
|
|
}
|
|
|
|
captured[ncap++] = strdup(svec);
|
|
|
|
return captured[ncap-1];
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
cleanup(void)
|
|
|
|
{
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
size_t i;
|
2020-12-17 11:48:02 +08:00
|
|
|
for(i=0;i<ncap;i++)
|
|
|
|
if(captured[i]) free(captured[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
Odometer*
|
2024-06-20 08:09:29 +08:00
|
|
|
odom_new(size_t rank, const size_t* stop, const size_t* max)
|
2020-12-17 11:48:02 +08:00
|
|
|
{
|
2024-06-20 08:09:29 +08:00
|
|
|
size_t i;
|
2020-12-17 11:48:02 +08:00
|
|
|
Odometer* odom = NULL;
|
|
|
|
if((odom = calloc(1,sizeof(Odometer))) == NULL)
|
|
|
|
return NULL;
|
|
|
|
odom->rank = rank;
|
|
|
|
for(i=0;i<rank;i++) {
|
|
|
|
odom->start[i] = 0;
|
|
|
|
odom->stop[i] = stop[i];
|
|
|
|
odom->max[i] = max[i];
|
|
|
|
odom->index[i] = 0;
|
|
|
|
}
|
|
|
|
return odom;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
odom_free(Odometer* odom)
|
|
|
|
{
|
|
|
|
if(odom) free(odom);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
odom_more(Odometer* odom)
|
|
|
|
{
|
|
|
|
return (odom->index[0] < odom->stop[0]);
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
odom_next(Odometer* odom)
|
|
|
|
{
|
2023-11-30 21:46:58 +08:00
|
|
|
int i;
|
2020-12-17 11:48:02 +08:00
|
|
|
for(i=odom->rank-1;i>=0;i--) {
|
|
|
|
odom->index[i]++;
|
|
|
|
if(odom->index[i] < odom->stop[i]) break;
|
|
|
|
if(i == 0) return 0; /* leave the 0th entry if it overflows */
|
|
|
|
odom->index[i] = 0; /* reset this position */
|
|
|
|
}
|
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get the value of the odometer */
|
|
|
|
size_t*
|
|
|
|
odom_indices(Odometer* odom)
|
|
|
|
{
|
|
|
|
return odom->index;
|
|
|
|
}
|
|
|
|
|
|
|
|
size_t
|
|
|
|
odom_offset(Odometer* odom)
|
|
|
|
{
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
size_t i,offset;
|
2020-12-17 11:48:02 +08:00
|
|
|
|
|
|
|
offset = 0;
|
|
|
|
for(i=0;i<odom->rank;i++) {
|
|
|
|
offset *= odom->max[i];
|
|
|
|
offset += odom->index[i];
|
|
|
|
}
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
const char*
|
|
|
|
odom_print(Odometer* odom)
|
|
|
|
{
|
|
|
|
static char s[4096];
|
|
|
|
static char tmp[4096];
|
|
|
|
const char* sv;
|
|
|
|
|
|
|
|
s[0] = '\0';
|
|
|
|
snprintf(tmp,sizeof(tmp),"{rank=%u",(unsigned)odom->rank);
|
|
|
|
strcat(s,tmp);
|
|
|
|
strcat(s," start=("); sv = printvector(odom->rank,odom->start); strcat(s,sv); strcat(s,")");
|
|
|
|
strcat(s," stop=("); sv = printvector(odom->rank,odom->stop); strcat(s,sv); strcat(s,")");
|
|
|
|
strcat(s," max=("); sv = printvector(odom->rank,odom->max); strcat(s,sv); strcat(s,")");
|
|
|
|
snprintf(tmp,sizeof(tmp)," offset=%u",(unsigned)odom_offset(odom)); strcat(s,tmp);
|
|
|
|
strcat(s," indices=("); sv = printvector(odom->rank,odom->index); strcat(s,sv); strcat(s,")");
|
|
|
|
strcat(s,"}");
|
|
|
|
return s;
|
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
char*
|
|
|
|
chunk_key(int format->rank, size_t* indices)
|
|
|
|
{
|
|
|
|
char key[NC_MAX_VAR_DIMS*3+1];
|
|
|
|
int i;
|
|
|
|
key[0] = '\0';
|
|
|
|
for(i=0;i<format->rank;i++) {
|
|
|
|
char s[3+1];
|
|
|
|
if(i > 0) strlcat(key,".",sizeof(key));
|
|
|
|
snprintf(s,sizeof(s),"%u",(unsigned)indices[i]);
|
|
|
|
strlcat(key,s,sizeof(key));
|
|
|
|
}
|
|
|
|
return strdup(key);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
void
|
|
|
|
setoffset(Odometer* odom, size_t* chunksizes, size_t* offset)
|
|
|
|
{
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
size_t i;
|
2020-12-17 11:48:02 +08:00
|
|
|
for(i=0;i<odom->rank;i++)
|
|
|
|
offset[i] = odom->index[i] * chunksizes[i];
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2020-12-20 12:17:46 +08:00
|
|
|
printindent(size_t indent)
|
2020-12-17 11:48:02 +08:00
|
|
|
{
|
2020-12-20 12:17:46 +08:00
|
|
|
while(indent-- > 0) printf(" ");
|
|
|
|
}
|
|
|
|
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
int
|
|
|
|
printchunklinear(Format* format, int* chunkdata, size_t indent)
|
|
|
|
{
|
|
|
|
size_t i;
|
|
|
|
for(i=0;i<format->chunkprod;i++) {
|
|
|
|
if(chunkdata[i] == format->fillvalue)
|
|
|
|
printf(" _");
|
|
|
|
else
|
|
|
|
printf(" %02d", chunkdata[i]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
return NC_NOERR;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
printchunk2d(Format* format, int* chunkdata, size_t indent)
|
|
|
|
{
|
|
|
|
size_t pos;
|
|
|
|
size_t row;
|
|
|
|
pos = 0;
|
|
|
|
for(row=0;row<format->chunklens[0];row++) {
|
|
|
|
size_t col;
|
|
|
|
if(row > 0) printindent(indent);
|
|
|
|
for(col=0;col<format->chunklens[1];col++,pos++) {
|
|
|
|
if(chunkdata[pos] == format->fillvalue) printf(" _"); else printf(" %02d", chunkdata[pos]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-12-20 12:17:46 +08:00
|
|
|
static void
|
|
|
|
printchunk(Format* format, int* chunkdata, size_t indent)
|
|
|
|
{
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
size_t k[3];
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
int rank = format->rank;
|
2023-11-30 21:46:58 +08:00
|
|
|
size_t cols[3], pos;
|
2020-12-20 12:17:46 +08:00
|
|
|
size_t* chl = format->chunklens;
|
|
|
|
|
|
|
|
memset(cols,0,sizeof(cols));
|
|
|
|
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
if(format->xtype == NC_UBYTE) rank = 0;
|
|
|
|
switch (rank) {
|
2020-12-20 12:17:46 +08:00
|
|
|
case 1:
|
|
|
|
cols[0] = 1;
|
|
|
|
cols[1] = 1;
|
|
|
|
cols[2] = chl[0];
|
|
|
|
break;
|
|
|
|
case 2:
|
|
|
|
cols[0] = 1;
|
|
|
|
cols[1] = chl[0];
|
|
|
|
cols[2] = chl[1];
|
|
|
|
break;
|
|
|
|
case 3:
|
|
|
|
cols[0] = chl[0];
|
|
|
|
cols[1] = chl[1];
|
|
|
|
cols[2] = chl[2];
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
cols[0] = 1;
|
|
|
|
cols[1] = 1;
|
|
|
|
cols[2] = format->chunkprod;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
// offset = (((k0*chl[0])+k1)*chl[1])+k2;
|
|
|
|
pos = 0;
|
|
|
|
for(k[0]=0;k[0]<cols[0];k[0]++) {
|
|
|
|
if(k[0] > 0) printindent(indent);
|
|
|
|
k[1] = 0; k[2] = 0; /* reset */
|
|
|
|
for(k[1]=0;k[1]<cols[1];k[1]++) {
|
|
|
|
k[2] = 0;
|
|
|
|
if(k[1] > 0) printf(" |");
|
|
|
|
for(k[2]=0;k[2]<cols[2];k[2]++) {
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
if(format->xtype == NC_UBYTE) {
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
size_t l;
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
unsigned char* bchunkdata = (unsigned char*)(&chunkdata[pos]);
|
|
|
|
for(l=0;l<sizeof(int);l++) {
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
if(bchunkdata[l] == format->fillvalue)
|
|
|
|
printf(" _");
|
|
|
|
else
|
|
|
|
printf(" %02u", bchunkdata[l]);
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
}
|
|
|
|
} else {
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
if(chunkdata[pos] == format->fillvalue)
|
|
|
|
printf(" _");
|
|
|
|
else
|
|
|
|
printf(" %02d", chunkdata[pos]);
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
}
|
2020-12-20 12:17:46 +08:00
|
|
|
pos++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
printf("\n");
|
|
|
|
}
|
2020-12-17 11:48:02 +08:00
|
|
|
|
2020-12-20 12:17:46 +08:00
|
|
|
#if 0
|
2020-12-17 11:48:02 +08:00
|
|
|
for(k=0;k<format->chunkprod;k++) {
|
|
|
|
if(k > 0 && k % cols == 0) printf(" |");
|
|
|
|
printf(" %02d", chunkdata[k]);
|
|
|
|
}
|
|
|
|
printf("\n");
|
2020-12-20 12:17:46 +08:00
|
|
|
#endif
|
2020-12-17 11:48:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
dump(Format* format)
|
|
|
|
{
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
void* chunkdata = NULL; /*[CHUNKPROD];*/
|
2020-12-17 11:48:02 +08:00
|
|
|
Odometer* odom = NULL;
|
2024-06-20 08:09:29 +08:00
|
|
|
size_t r;
|
2020-12-17 11:48:02 +08:00
|
|
|
size_t offset[NC_MAX_VAR_DIMS];
|
|
|
|
int holechunk = 0;
|
2020-12-20 12:17:46 +08:00
|
|
|
char sindices[64];
|
2020-12-17 11:48:02 +08:00
|
|
|
#ifdef H5
|
2024-06-20 08:09:29 +08:00
|
|
|
size_t i;
|
2020-12-17 11:48:02 +08:00
|
|
|
hid_t fileid, grpid, datasetid;
|
|
|
|
hid_t dxpl_id = H5P_DEFAULT; /*data transfer property list */
|
|
|
|
unsigned int filter_mask = 0;
|
|
|
|
hsize_t hoffset[NC_MAX_VAR_DIMS];
|
|
|
|
#endif
|
|
|
|
#ifdef NZ
|
|
|
|
int stat = NC_NOERR;
|
|
|
|
size64_t zindices[NC_MAX_VAR_DIMS];
|
|
|
|
int ncid, varid;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef H5
|
|
|
|
if(format->debug) {
|
2021-04-23 06:16:05 +08:00
|
|
|
H5Eset_auto2(H5E_DEFAULT,(H5E_auto2_t)H5Eprint1,stderr);
|
2020-12-17 11:48:02 +08:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
memset(offset,0,sizeof(offset));
|
|
|
|
#ifdef H5
|
|
|
|
memset(hoffset,0,sizeof(hoffset));
|
|
|
|
#endif
|
|
|
|
|
|
|
|
switch (format->format) {
|
|
|
|
#ifdef H5
|
|
|
|
case NC_FORMATX_NC_HDF5:
|
|
|
|
if ((fileid = H5Fopen(format->file_name, H5F_ACC_RDONLY, H5P_DEFAULT)) < 0) usage(NC_EHDFERR);
|
|
|
|
if ((grpid = H5Gopen1(fileid, "/")) < 0) usage(NC_EHDFERR);
|
|
|
|
if ((datasetid = H5Dopen1(grpid, format->var_name)) < 0) usage(NC_EHDFERR);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef NZ
|
|
|
|
case NC_FORMATX_NCZARR:
|
|
|
|
if((stat=nc_open(format->file_name,0,&ncid))) usage(stat);
|
|
|
|
if((stat=nc_inq_varid(ncid,format->var_name,&varid))) usage(stat);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default: usage(NC_EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
if((odom = odom_new(format->rank,format->chunkcounts,format->dimlens))==NULL) usage(NC_ENOMEM);
|
|
|
|
|
|
|
|
if((chunkdata = calloc(sizeof(int),format->chunkprod))==NULL) usage(NC_ENOMEM);
|
|
|
|
|
2024-06-20 08:09:29 +08:00
|
|
|
printf("rank=%zu dims=(%s) chunks=(%s)\n",format->rank,printvector(format->rank,format->dimlens),
|
2020-12-17 11:48:02 +08:00
|
|
|
printvector(format->rank,format->chunklens));
|
|
|
|
|
|
|
|
while(odom_more(odom)) {
|
|
|
|
setoffset(odom,format->chunklens,offset);
|
|
|
|
|
|
|
|
#ifdef DEBUG
|
|
|
|
fprintf(stderr,"odom=%s\n",odom_print(odom));
|
|
|
|
fprintf(stderr,"offset=(");
|
|
|
|
for(i=0;i<format->rank;i++)
|
|
|
|
fprintf(stderr,"%s%lu",(i > 0 ? "," : ""),(unsigned long)offset[i]);
|
|
|
|
fprintf(stderr,")\n");
|
|
|
|
fflush(stderr);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if(format->debug) {
|
|
|
|
fprintf(stderr,"chunk: %s\n",printvector(format->rank,offset));
|
|
|
|
}
|
|
|
|
|
|
|
|
holechunk = 0;
|
|
|
|
switch (format->format) {
|
|
|
|
#ifdef H5
|
|
|
|
case NC_FORMATX_NC_HDF5: {
|
|
|
|
for(i=0;i<format->rank;i++) hoffset[i] = (hsize_t)offset[i];
|
|
|
|
if(H5Dread_chunk(datasetid, dxpl_id, hoffset, &filter_mask, chunkdata) < 0)
|
|
|
|
holechunk = 1;
|
|
|
|
} break;
|
|
|
|
#endif
|
|
|
|
#ifdef NZ
|
|
|
|
case NC_FORMATX_NCZARR:
|
|
|
|
for(r=0;r<format->rank;r++) zindices[r] = (size64_t)odom->index[r];
|
|
|
|
switch (stat=NCZ_read_chunk(ncid, varid, zindices, chunkdata)) {
|
|
|
|
case NC_NOERR: break;
|
2021-01-29 11:11:01 +08:00
|
|
|
case NC_EEMPTY: holechunk = 1; break;
|
2020-12-17 11:48:02 +08:00
|
|
|
default: usage(stat);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default: usage(NC_EINVAL);
|
|
|
|
}
|
|
|
|
if(holechunk) {
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
/* Hole chunk: use holevalue */
|
2020-12-17 11:48:02 +08:00
|
|
|
size_t i = 0;
|
|
|
|
int* idata = (int*)chunkdata;
|
|
|
|
for(i=0;i<format->chunkprod;i++)
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
idata[i] = format->holevalue;
|
2020-12-17 11:48:02 +08:00
|
|
|
}
|
2020-12-20 12:17:46 +08:00
|
|
|
sindices[0] = '\0';
|
|
|
|
for(r=0;r<format->rank;r++) {
|
|
|
|
char sstep[64];
|
|
|
|
snprintf(sstep,sizeof(sstep),"[%lu/%lu]",(unsigned long)odom->index[r],(unsigned long)offset[r]);
|
|
|
|
strcat(sindices,sstep);
|
|
|
|
}
|
|
|
|
strcat(sindices," =");
|
|
|
|
printf("%s",sindices);
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
if(format->linear)
|
|
|
|
printchunklinear(format,chunkdata,strlen(sindices));
|
|
|
|
else if(format->rank == 2)
|
|
|
|
printchunk2d(format,chunkdata,strlen(sindices));
|
|
|
|
else
|
|
|
|
printchunk(format,chunkdata,strlen(sindices));
|
2020-12-17 11:48:02 +08:00
|
|
|
fflush(stdout);
|
|
|
|
odom_next(odom);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Close up. */
|
|
|
|
switch (format->format) {
|
|
|
|
#ifdef H5
|
|
|
|
case NC_FORMATX_NC_HDF5:
|
|
|
|
if (H5Dclose(datasetid) < 0) abort();
|
|
|
|
if (H5Gclose(grpid) < 0) abort();
|
|
|
|
if (H5Fclose(fileid) < 0) abort();
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
#ifdef NZ
|
|
|
|
case NC_FORMATX_NCZARR:
|
|
|
|
if((stat=nc_close(ncid))) usage(stat);
|
|
|
|
break;
|
|
|
|
#endif
|
|
|
|
default: usage(NC_EINVAL);
|
|
|
|
}
|
|
|
|
/* Cleanup */
|
|
|
|
free(chunkdata);
|
|
|
|
odom_free(odom);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2021-01-29 11:11:01 +08:00
|
|
|
static const char* urlexts[] = {"file", "zip", NULL};
|
2020-12-17 11:48:02 +08:00
|
|
|
|
|
|
|
static const char*
|
|
|
|
filenamefor(const char* f0)
|
|
|
|
{
|
|
|
|
static char result[4096];
|
|
|
|
const char** extp;
|
|
|
|
char* p;
|
|
|
|
|
|
|
|
strcpy(result,f0); /* default */
|
|
|
|
if(nc__testurl(f0,NULL)) goto done;
|
|
|
|
/* Not a URL */
|
|
|
|
p = strrchr(f0,'.'); /* look at the extension, if any */
|
|
|
|
if(p == NULL) goto done; /* No extension */
|
|
|
|
p++;
|
|
|
|
for(extp=urlexts;*extp;extp++) {
|
|
|
|
if(strcmp(p,*extp)==0) break;
|
|
|
|
}
|
|
|
|
if(*extp == NULL) goto done; /* not found */
|
|
|
|
/* Assemble the url */
|
|
|
|
strcpy(result,"file://");
|
|
|
|
strcat(result,f0); /* core path */
|
|
|
|
strcat(result,"#mode=nczarr,");
|
|
|
|
strcat(result,*extp);
|
|
|
|
done:
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
main(int argc, char** argv)
|
|
|
|
{
|
2024-06-20 08:09:29 +08:00
|
|
|
int stat = NC_NOERR;
|
|
|
|
size_t i;
|
2020-12-17 11:48:02 +08:00
|
|
|
Format format;
|
|
|
|
int ncid, varid, dimids[NC_MAX_VAR_DIMS];
|
|
|
|
int vtype, storage;
|
|
|
|
int mode;
|
|
|
|
int c;
|
2024-06-20 08:09:29 +08:00
|
|
|
int r;
|
2020-12-17 11:48:02 +08:00
|
|
|
|
|
|
|
memset(&format,0,sizeof(format));
|
|
|
|
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
/* Init some format fields */
|
|
|
|
format.xtype = NC_INT;
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
format.holevalue = -2;
|
|
|
|
|
|
|
|
while ((c = getopt(argc, argv, "bhv:DT:L")) != EOF) {
|
2020-12-17 11:48:02 +08:00
|
|
|
switch(c) {
|
Enhance/Fix filter support
re: Discussion https://github.com/Unidata/netcdf-c/discussions/2214
The primary change is to support so-called "standard filters".
A standard filter is one that is defined by the following
netcdf-c API:
````
int nc_def_var_XXX(int ncid, int varid, size_t nparams, unsigned* params);
int nc_inq_var_XXXX(int ncid, int varid, int* usefilterp, unsigned* params);
````
So for example, zstandard would be a standard filter by defining
the functions *nc_def_var_zstandard* and *nc_inq_var_zstandard*.
In order to define these functions, we need a new dispatch function:
````
int nc_inq_filter_avail(int ncid, unsigned filterid);
````
This function, combined with the existing filter API can be used
to implement arbitrary standard filters using a simple code pattern.
Note that I would have preferred that this function return a list
of all available filters, but HDF5 does not support that functionality.
So this PR implements the dispatch function and implements
the following standard functions:
+ bzip2
+ zstandard
+ blosc
Specific test cases are also provided for HDF5 and NCZarr.
Over time, other specific standard filters will be defined.
## Primary Changes
* Add nc_inq_filter_avail() to netcdf-c API.
* Add standard filter implementations to test use of *nc_inq_filter_avail*.
* Bump the dispatch table version number and add to all the relevant
dispatch tables (libsrc, libsrcp, etc).
* Create a program to invoke nc_inq_filter_avail so that it is accessible
to shell scripts.
* Cleanup szip support to properly support szip
when HDF5 is disabled. This involves detecting
libsz separately from testing if HDF5 supports szip.
* Integrate shuffle and fletcher32 into the existing
filter API. This means that, for example, nc_def_var_fletcher32
is now a wrapper around nc_def_var_filter.
* Extend the Codec defaulting to allow multiple default shared libraries.
## Misc. Changes
* Modify configure.ac/CMakeLists.txt to look for the relevant
libraries implementing standard filters.
* Modify libnetcdf.settings to list available standard filters
(including deflate and szip).
* Add CMake test modules to locate libbz2 and libzstd.
* Cleanup the HDF5 memory manager function use in the plugins.
* remove unused file include//ncfilter.h
* remove tests for the HDF5 memory operations e.g. H5allocate_memory.
* Add flag to ncdump to force use of _Filter instead of _Deflate
or _Shuffle or _Fletcher32. Used for testing.
2022-03-15 02:39:37 +08:00
|
|
|
case 'b':
|
|
|
|
format.xtype = NC_UBYTE;
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
break;
|
|
|
|
case 'h':
|
|
|
|
usage(0);
|
|
|
|
break;
|
2020-12-17 11:48:02 +08:00
|
|
|
case 'v':
|
|
|
|
strcpy(format.var_name,optarg);
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
break;
|
2020-12-17 11:48:02 +08:00
|
|
|
case 'D':
|
|
|
|
format.debug = 1;
|
|
|
|
break;
|
Mitigate S3 test interference + Unlimited Dimensions in NCZarr
This PR started as an attempt to add unlimited dimensions to NCZarr.
It did that, but this exposed significant problems with test interference.
So this PR is mostly about fixing -- well mitigating anyway -- test
interference.
The problem of test interference is now documented in the document docs/internal.md.
The solutions implemented here are also describe in that document.
The solution is somewhat fragile but multiple cleanup mechanisms
are provided. Note that this feature requires that the
AWS command line utility must be installed.
## Unlimited Dimensions.
The existing NCZarr extensions to Zarr are modified to support unlimited dimensions.
NCzarr extends the Zarr meta-data for the ".zgroup" object to include netcdf-4 model extensions. This information is stored in ".zgroup" as dictionary named "_nczarr_group".
Inside "_nczarr_group", there is a key named "dims" that stores information about netcdf-4 named dimensions. The value of "dims" is a dictionary whose keys are the named dimensions. The value associated with each dimension name has one of two forms
Form 1 is a special case of form 2, and is kept for backward compatibility. Whenever a new file is written, it uses format 1 if possible, otherwise format 2.
* Form 1: An integer representing the size of the dimension, which is used for simple named dimensions.
* Form 2: A dictionary with the following keys and values"
- "size" with an integer value representing the (current) size of the dimension.
- "unlimited" with a value of either "1" or "0" to indicate if this dimension is an unlimited dimension.
For Unlimited dimensions, the size is initially zero, and as variables extend the length of that dimension, the size value for the dimension increases.
That dimension size is shared by all arrays referencing that dimension, so if one array extends an unlimited dimension, it is implicitly extended for all other arrays that reference that dimension.
This is the standard semantics for unlimited dimensions.
Adding unlimited dimensions required a number of other changes to the NCZarr code-base. These included the following.
* Did a partial refactor of the slice handling code in zwalk.c to clean it up.
* Added a number of tests for unlimited dimensions derived from the same test in nc_test4.
* Added several NCZarr specific unlimited tests; more are needed.
* Add test of endianness.
## Misc. Other Changes
* Modify libdispatch/ncs3sdk_aws.cpp to optionally support use of the
AWS Transfer Utility mechanism. This is controlled by the
```#define TRANSFER```` command in that file. It defaults to being disabled.
* Parameterize both the standard Unidata S3 bucket (S3TESTBUCKET) and the netcdf-c test data prefix (S3TESTSUBTREE).
* Fixed an obscure memory leak in ncdump.
* Removed some obsolete unit testing code and test cases.
* Uncovered a bug in the netcdf-c handling of big-endian floats and doubles. Have not fixed yet. See tst_h5_endians.c.
* Renamed some nczarr_tests testcases to avoid name conflicts with nc_test4.
* Modify the semantics of zmap\#ncsmap_write to only allow total rewrite of objects.
* Modify the semantics of zodom to properly handle stride > 1.
* Add a truncate operation to the libnczarr zmap code.
2023-09-27 06:56:48 +08:00
|
|
|
case 'L':
|
|
|
|
format.linear = 1;
|
|
|
|
break;
|
2021-01-07 04:35:59 +08:00
|
|
|
case 'T':
|
|
|
|
nctracelevel(atoi(optarg));
|
|
|
|
break;
|
2020-12-17 11:48:02 +08:00
|
|
|
case '?':
|
|
|
|
fprintf(stderr,"unknown option: '%c'\n",c);
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* get file argument */
|
|
|
|
argc -= optind;
|
|
|
|
argv += optind;
|
|
|
|
|
|
|
|
if (argc == 0) {
|
|
|
|
fprintf(stderr, "no input file specified\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
2021-04-22 04:59:15 +08:00
|
|
|
char* s = NC_shellUnescape(argv[0]);
|
2020-12-17 11:48:02 +08:00
|
|
|
strcpy(format.file_name,filenamefor(s));
|
|
|
|
nullfree(s);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(strlen(format.file_name) == 0) {
|
|
|
|
fprintf(stderr, "no input file specified\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
if(strlen(format.var_name) == 0) {
|
|
|
|
fprintf(stderr, "no input var specified\n");
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Get info about the file type */
|
|
|
|
if((stat=nc_open(format.file_name,0,&ncid))) usage(stat);
|
|
|
|
|
|
|
|
if((stat=nc_inq_format_extended(ncid,&format.format,&mode))) usage(stat);
|
|
|
|
|
|
|
|
/* Get the info about the var */
|
|
|
|
if((stat=nc_inq_varid(ncid,format.var_name,&varid))) usage(stat);
|
2024-06-20 08:09:29 +08:00
|
|
|
if((stat=nc_inq_var(ncid,varid,NULL,&vtype,&r,dimids,NULL))) usage(stat);
|
|
|
|
format.rank = (size_t)r;
|
2020-12-17 11:48:02 +08:00
|
|
|
if(format.rank == 0) usage(NC_EDIMSIZE);
|
|
|
|
if((stat=nc_inq_var_chunking(ncid,varid,&storage,format.chunklens))) usage(stat);
|
|
|
|
if(storage != NC_CHUNKED) usage(NC_EBADCHUNK);
|
|
|
|
if((stat=nc_get_att(ncid,varid,"_FillValue",&format.fillvalue))) usage(stat);
|
|
|
|
|
|
|
|
for(i=0;i<format.rank;i++) {
|
|
|
|
if((stat=nc_inq_dimlen(ncid,dimids[i],&format.dimlens[i]))) usage(stat);
|
|
|
|
format.chunkcounts[i] = ceildiv(format.dimlens[i],format.chunklens[i]);
|
|
|
|
}
|
|
|
|
|
|
|
|
if((stat=nc_close(ncid))) usage(stat);
|
|
|
|
|
|
|
|
/* Precompute */
|
|
|
|
for(format.chunkprod=1,i=0;i<format.rank;i++) format.chunkprod *= format.chunklens[i];
|
|
|
|
for(format.dimprod=1,i=0;i<format.rank;i++) format.dimprod *= format.dimlens[i];
|
|
|
|
|
|
|
|
dump(&format);
|
|
|
|
|
|
|
|
cleanup();
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|