netcdf-c/nczarr_test/zs3parse.c

170 lines
3.7 KiB
C
Raw Normal View History

/*
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#if defined(_WIN32) && !defined(__MINGW32__)
#include "XGetopt.h"
#endif
#include "zincludes.h"
#include "ncpathmgr.h"
Add support for setting HDF5 alignment property when creating a file re: https://github.com/Unidata/netcdf-c/issues/2177 re: https://github.com/Unidata/netcdf-c/pull/2178 Provide get/set functions to store global data alignment information and apply it when a file is created. The api is as follows: ```` int nc_set_alignment(int threshold, int alignment); int nc_get_alignment(int* thresholdp, int* alignmentp); ```` If defined, then for every file created opened after the call to nc_set_alignment, for every new variable added to the file, the most recently set threshold and alignment values will be applied to that variable. The nc_get_alignment function return the last values set by nc_set_alignment. If nc_set_alignment has not been called, then it returns the value 0 for both threshold and alignment. The alignment parameters are stored in the NCglobalstate object (see below) for use as needed. Repeated calls to nc_set_alignment will overwrite any existing values in NCglobalstate. The alignment parameters are applied in libhdf5/hdf5create.c and libhdf5/hdf5open.c The set/get alignment functions are defined in libsrc4/nc4internal.c. A test program was added as nc_test4/tst_alignment.c. ## Misc. Changes Unrelated to Alignment * The NCRCglobalstate type was renamed to NCglobalstate to indicate that it represented more general global state than just .rc data. It was also moved to nc4internal.h. This led to a large number of small changes: mostly renaming. The global state management functions were moved to nc4internal.c. * The global chunk cache variables have been moved into NCglobalstate. As warranted, other global state will be moved as well. * Some misc. problems with the nczarr performance tests were corrected.
2022-01-30 06:27:52 +08:00
#undef DEBUG
#define AWSHOST ".amazonaws.com"
typedef enum S3op {
S3_NONE=0,
S3_HOST=1,
S3_BUCKET=2,
S3_KEY=3,
} S3op;
/* Command line options */
struct S3options {
int debug;
S3op op;
char* url;
} s3options;
/*Forward*/
static int processurl(S3op op, const char* url, char** piece);
static void
zs3usage(void)
{
fprintf(stderr,"usage: zs3parse [-h|-b|-k] <url>|<file>\n");
exit(1);
}
int
main(int argc, char** argv)
{
int stat = NC_NOERR;
int c;
char* piece = NULL;
memset((void*)&s3options,0,sizeof(s3options));
while ((c = getopt(argc, argv, "vhbk")) != EOF) {
switch(c) {
case 'b':
s3options.op = S3_BUCKET;
break;
case 'h':
s3options.op = S3_HOST;
break;
case 'k':
s3options.op = S3_KEY;
break;
case 'v':
zs3usage();
goto done;
case '?':
fprintf(stderr,"unknown option: %c\n",c);
goto fail;
}
}
/* get url|file argument */
argc -= optind;
argv += optind;
if (argc > 1) {
fprintf(stderr, "zs3parse: only one url|file argument permitted\n");
goto fail;
}
if (argc == 0) {
fprintf(stderr, "zs3parse: no url|file specified\n");
goto fail;
}
Codify cross-platform file paths The netcdf-c code has to deal with a variety of platforms: Windows, OSX, Linux, Cygwin, MSYS, etc. These platforms differ significantly in the kind of file paths that they accept. So in order to handle this, I have created a set of replacements for the most common file system operations such as _open_ or _fopen_ or _access_ to manage the file path differences correctly. A more limited version of this idea was already implemented via the ncwinpath.h and dwinpath.c code. So this can be viewed as a replacement for that code. And in path in many cases, the only change that was required was to replace '#include <ncwinpath.h>' with '#include <ncpathmgt.h>' and then replace file operation calls with the NCxxx equivalent from ncpathmgr.h Note that recently, the ncwinpath.h was renamed ncpathmgmt.h, so this pull request should not require dealing with winpath. The heart of the change is include/ncpathmgmt.h, which provides alternate operations such as NCfopen or NCaccess and which properly parse and rebuild path arguments to work for the platform on which the code is executing. This mostly matters for Windows because of the way that it uses backslash and drive letters, as compared to *nix*. One important feature is that the user can do string manipulations on a file path without having to worry too much about the platform because the path management code will properly handle most mixed cases. So one can for example concatenate a path suffix that uses forward slashes to a Windows path and have it work correctly. The conversion code is in libdispatch/dpathmgr.c, and the important function there is NCpathcvt which does the proper conversions to the local path format. As a rule, most code should just replace their file operations with the corresponding NCxxx ones defined in include/ncpathmgmt.h. These NCxxx functions all call NCpathcvt on their path arguments before executing the actual file operation. In some rare cases, the client may need to directly use NCpathcvt, but this should be avoided as much as possible. If there is a need for supporting a new file operation not already in ncpathmgmt.h, then use the code in dpathmgr.c as a template. Also please notify Unidata so we can include it as a formal part or our supported operations. Also, if you see an operation in the library that is not using the NCxxx form, then please submit an issue so we can fix it. Misc. Changes: * Clean up the utf8 testing code; it is impossible to get some tests to work under windows using shell scripts; the args do not pass as utf8 but as some other encoding. * Added an extra utf8 test case: test_unicode_path.sh * Add a true test for HDF5 1.10.6 or later because as noted in PR https://github.com/Unidata/netcdf-c/pull/1794, HDF5 changed its Windows file path handling.
2021-03-05 04:41:31 +08:00
s3options.url = strdup(argv[0]);
stat = processurl(s3options.op, s3options.url, &piece);
if(stat == NC_NOERR) {
if(piece == NULL) goto fail;
printf("%s",piece);
}
done:
nullfree(piece);
/* Reclaim s3options */
nullfree(s3options.url);
if(stat)
fprintf(stderr,"fail: %s\n",nc_strerror(stat));
return (stat ? 1 : 0);
fail:
stat = NC_EINVAL;
goto done;
}
static int
processurl(S3op op, const char* surl, char** piece)
{
int stat = NC_NOERR;
NClist* segments = NULL;
NCbytes* buf = ncbytesnew();
char* value = NULL;
char* host = NULL;
char* bucket = NULL;
char* prefix = NULL;
NCURI* url = NULL;
ncuriparse(surl,&url);
if(url == NULL)
{stat = NC_EURL; goto done;}
/* do some verification */
if(strcmp(url->protocol,"https") != 0
&& strcmp(url->protocol,"http") != 0)
{stat = NC_EURL; goto done;}
if(url->host == NULL || strlen(url->host) == 0)
{stat = NC_EURL; goto done;}
if((host = strdup(url->host))==NULL)
{stat = NC_ENOMEM; goto done;}
/* We have to process the path to get the bucket,
and remove it from the path */
if(url->path == NULL || strlen(url->path) == 0)
{stat = NC_EURL; goto done;}
/* split the path by "/" */
nclistfreeall(segments);
segments = nclistnew();
if((stat = nczm_split_delim(url->path,'/',segments))) goto done;
if(nclistlength(segments) == 0)
{stat = NC_EURL; goto done;}
bucket = ((char*)nclistremove(segments,0));
if((stat = nczm_join(segments,&prefix))) goto done;
nclistfreeall(segments); segments = NULL;
switch (op) {
case S3_HOST: value = host; host = NULL; break;
case S3_BUCKET: value = bucket; bucket = NULL; break;
case S3_KEY: value = prefix; prefix = NULL; break;
default: stat = NC_EURL; goto done;
}
if(piece) {*piece = value; value = NULL;}
done:
ncurifree(url);
nullfree(value);
nullfree(host);
nullfree(bucket);
nullfree(prefix);
ncbytesfree(buf);
nclistfreeall(segments);
return stat;
}