netcdf-c/nczarr_test/zs3parse.c
Dennis Heimbigner 36102e3c32 Improve UTF8 Support On Windows
re: Issue https://github.com/Unidata/netcdf-c/issues/2190

The primary purpose of this PR is to improve the utf8 support
for windows. This is persuant to a change in Windows that
supports utf8 natively (almost). The almost means that it is
still utf16 internally and the set of characters representable
by utf8 is larger than those representable by utf16.

This leaves open the question in the Issue about handling
the Windows 1252 character set.

This required the following changes:

1. Test the Windows build and major version in order to see if
   native utf8 is supported.
2. If native utf8 is supported, Modify dpathmgr.c to call the 8-bit
   version of the windows fopen() and open() functions.
3. In support of this, programs that use XGetOpt (Windows versions)
   need to get the command line as utf8 and then parse to
   arc+argv as utf8. This requires using a homegrown command line parser
   named XCommandLineToArgvA.
4. Add a utility program called "acpget" that prints out the
   current Windows code page and locale.

Additionally, some technical debt was cleaned up as follows:

1. Unify all the places which attempt to read all or a part
   of a file into the dutil.c#NC_readfile code.
2. Similary unify all the code that creates temp files into
   dutil.c#NC_mktmp code.
3. Convert almost all remaining calls to fopen() and open()
   to NCfopen() and NCopen3(). This is to ensure that path management
   is used consistently. This touches a number of files.
4. extern->EXTERNL as needed to get it to work under Windows.
2022-02-08 20:53:30 -07:00

170 lines
3.7 KiB
C

/*
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/
#include "config.h"
#include <stdlib.h>
#include <stdio.h>
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#if defined(_WIN32) && !defined(__MINGW32__)
#include "XGetopt.h"
#endif
#include "zincludes.h"
#include "ncpathmgr.h"
#undef DEBUG
#define AWSHOST ".amazonaws.com"
typedef enum S3op {
S3_NONE=0,
S3_HOST=1,
S3_BUCKET=2,
S3_KEY=3,
} S3op;
/* Command line options */
struct S3options {
int debug;
S3op op;
char* url;
} s3options;
/*Forward*/
static int processurl(S3op op, const char* url, char** piece);
static void
zs3usage(void)
{
fprintf(stderr,"usage: zs3parse [-h|-b|-k] <url>|<file>\n");
exit(1);
}
int
main(int argc, char** argv)
{
int stat = NC_NOERR;
int c;
char* piece = NULL;
memset((void*)&s3options,0,sizeof(s3options));
while ((c = getopt(argc, argv, "vhbk")) != EOF) {
switch(c) {
case 'b':
s3options.op = S3_BUCKET;
break;
case 'h':
s3options.op = S3_HOST;
break;
case 'k':
s3options.op = S3_KEY;
break;
case 'v':
zs3usage();
goto done;
case '?':
fprintf(stderr,"unknown option: %c\n",c);
goto fail;
}
}
/* get url|file argument */
argc -= optind;
argv += optind;
if (argc > 1) {
fprintf(stderr, "zs3parse: only one url|file argument permitted\n");
goto fail;
}
if (argc == 0) {
fprintf(stderr, "zs3parse: no url|file specified\n");
goto fail;
}
s3options.url = strdup(argv[0]);
stat = processurl(s3options.op, s3options.url, &piece);
if(stat == NC_NOERR) {
if(piece == NULL) goto fail;
printf("%s",piece);
}
done:
nullfree(piece);
/* Reclaim s3options */
nullfree(s3options.url);
if(stat)
fprintf(stderr,"fail: %s\n",nc_strerror(stat));
return (stat ? 1 : 0);
fail:
stat = NC_EINVAL;
goto done;
}
static int
processurl(S3op op, const char* surl, char** piece)
{
int stat = NC_NOERR;
NClist* segments = NULL;
NCbytes* buf = ncbytesnew();
char* value = NULL;
char* host = NULL;
char* bucket = NULL;
char* prefix = NULL;
NCURI* url = NULL;
ncuriparse(surl,&url);
if(url == NULL)
{stat = NC_EURL; goto done;}
/* do some verification */
if(strcmp(url->protocol,"https") != 0
&& strcmp(url->protocol,"http") != 0)
{stat = NC_EURL; goto done;}
if(url->host == NULL || strlen(url->host) == 0)
{stat = NC_EURL; goto done;}
if((host = strdup(url->host))==NULL)
{stat = NC_ENOMEM; goto done;}
/* We have to process the path to get the bucket,
and remove it from the path */
if(url->path == NULL || strlen(url->path) == 0)
{stat = NC_EURL; goto done;}
/* split the path by "/" */
nclistfreeall(segments);
segments = nclistnew();
if((stat = nczm_split_delim(url->path,'/',segments))) goto done;
if(nclistlength(segments) == 0)
{stat = NC_EURL; goto done;}
bucket = ((char*)nclistremove(segments,0));
if((stat = nczm_join(segments,&prefix))) goto done;
nclistfreeall(segments); segments = NULL;
switch (op) {
case S3_HOST: value = host; host = NULL; break;
case S3_BUCKET: value = bucket; bucket = NULL; break;
case S3_KEY: value = prefix; prefix = NULL; break;
default: stat = NC_EURL; goto done;
}
if(piece) {*piece = value; value = NULL;}
done:
ncurifree(url);
nullfree(value);
nullfree(host);
nullfree(bucket);
nullfree(prefix);
ncbytesfree(buf);
nclistfreeall(segments);
return stat;
}