netcdf-c/nczarr_test/zmapio.c
Dennis Heimbigner 36102e3c32 Improve UTF8 Support On Windows
re: Issue https://github.com/Unidata/netcdf-c/issues/2190

The primary purpose of this PR is to improve the utf8 support
for windows. This is persuant to a change in Windows that
supports utf8 natively (almost). The almost means that it is
still utf16 internally and the set of characters representable
by utf8 is larger than those representable by utf16.

This leaves open the question in the Issue about handling
the Windows 1252 character set.

This required the following changes:

1. Test the Windows build and major version in order to see if
   native utf8 is supported.
2. If native utf8 is supported, Modify dpathmgr.c to call the 8-bit
   version of the windows fopen() and open() functions.
3. In support of this, programs that use XGetOpt (Windows versions)
   need to get the command line as utf8 and then parse to
   arc+argv as utf8. This requires using a homegrown command line parser
   named XCommandLineToArgvA.
4. Add a utility program called "acpget" that prints out the
   current Windows code page and locale.

Additionally, some technical debt was cleaned up as follows:

1. Unify all the places which attempt to read all or a part
   of a file into the dutil.c#NC_readfile code.
2. Similary unify all the code that creates temp files into
   dutil.c#NC_mktmp code.
3. Convert almost all remaining calls to fopen() and open()
   to NCfopen() and NCopen3(). This is to ensure that path management
   is used consistently. This touches a number of files.
4. extern->EXTERNL as needed to get it to work under Windows.
2022-02-08 20:53:30 -07:00

560 lines
13 KiB
C

/*
* Copyright 2018, University Corporation for Atmospheric Research
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifdef HAVE_UNISTD_H
#include <unistd.h>
#endif
#ifdef HAVE_GETOPT_H
#include <getopt.h>
#endif
#if defined(_WIN32) && !defined(__MINGW32__)
#include "XGetopt.h"
#endif
#include "zincludes.h"
#include "ncpathmgr.h"
#include "nclog.h"
#include "ncuri.h"
#undef DEBUG
#define DATANAME "data"
typedef enum Mapop {
MOP_NONE=0,
MOP_OBJDUMP=1,
MOP_CLEAR=2
} Mapop;
typedef enum OBJKIND {
OK_NONE=0,
OK_META=1,
OK_CHUNK=2,
OK_GROUP=3,
OK_IGNORE=4
} OBJKIND;
static struct Mops {
Mapop mapop;
const char* opname;
} mapops[] = {
{MOP_NONE,"none"},
{MOP_OBJDUMP,"objdump"},
{MOP_CLEAR,"clear"},
{MOP_NONE,NULL}
};
static struct Type {
const char* typename;
nc_type nctype;
int typesize;
} types[] = {
{"ubyte",NC_UBYTE,1},
{"byte",NC_BYTE,1},
{"ushort",NC_USHORT,2},
{"short",NC_SHORT,2},
{"uint",NC_UINT,4},
{"int",NC_INT,4},
{"uint64",NC_UINT64,8},
{"int64",NC_INT64,8},
{"float",NC_FLOAT,4},
{"double",NC_DOUBLE,8},
{NULL,NC_NAT,0}
};
/* Command line options */
struct Dumpptions {
int debug;
int meta_only;
Mapop mop;
char infile[4096];
NCZM_IMPL impl;
char* rootpath;
const struct Type* nctype;
int xflags;
# define XNOZMETADATA 1
} dumpoptions;
/* Forward */
static int objdump(void);
static NCZM_IMPL implfor(const char* path);
static void printcontent(size64_t len, const char* content, OBJKIND kind);
static int breadthfirst(NCZMAP* map, const char*, NClist* stack);
static char* rootpathfor(const char* path);
static OBJKIND keykind(const char* key);
static void sortlist(NClist* l);
static const char* filenamefor(const char* f0);
#define NCCHECK(expr) nccheck((expr),__LINE__)
static void nccheck(int stat, int line)
{
if(stat) {
fprintf(stderr,"%d: %s\n",line,nc_strerror(stat));
fflush(stderr);
exit(1);
}
}
static void
zmapusage(void)
{
fprintf(stderr,"usage: zmapio [-t <type>][-d][-v][-x] <file>\n");
exit(1);
}
static Mapop
decodeop(const char* name)
{
struct Mops* p = mapops;
for(;p->opname != NULL;p++) {
if(strcasecmp(p->opname,name)==0) return p->mapop;
}
return MOP_NONE;
}
static const struct Type*
decodetype(const char* name)
{
struct Type* p = types;
for(;p->typename != NULL;p++) {
if(strcasecmp(p->typename,name)==0) return p;
}
return NULL;
}
int
main(int argc, char** argv)
{
int stat = NC_NOERR;
int c;
char* p;
nc_initialize();
memset((void*)&dumpoptions,0,sizeof(dumpoptions));
while ((c = getopt(argc, argv, "dhvx:t:T:X:")) != EOF) {
switch(c) {
case 'd':
dumpoptions.debug = 1;
break;
case 'h':
dumpoptions.meta_only = 1;
break;
case 'v':
zmapusage();
goto done;
case 't':
dumpoptions.nctype = decodetype(optarg);
if(dumpoptions.nctype == NULL) zmapusage();
break;
case 'x':
dumpoptions.mop = decodeop(optarg);
if(dumpoptions.mop == MOP_NONE) zmapusage();
break;
case 'T':
nctracelevel(atoi(optarg));
break;
case 'X':
for(p=optarg;*p;p++) {
switch (*p) {
case 'm': dumpoptions.xflags |= XNOZMETADATA; break;
default: fprintf(stderr,"Unknown -X argument: %c",*p); break;
}
};
break;
case '?':
fprintf(stderr,"unknown option\n");
goto fail;
}
}
/* Default the kind */
if(dumpoptions.nctype == NULL) {
dumpoptions.nctype = &types[0];
fprintf(stderr,"Default type: %s\n",dumpoptions.nctype->typename);
}
/* get file argument */
argc -= optind;
argv += optind;
if (argc > 1) {
fprintf(stderr, "zmapio: only one input file argument permitted\n");
goto fail;
}
if (argc == 0) {
fprintf(stderr, "zmapio: no input file specified\n");
goto fail;
}
{
char* p = NC_shellUnescape(argv[0]);
strcpy(dumpoptions.infile,filenamefor(p));
if(p) free(p);
}
if((dumpoptions.impl = implfor(dumpoptions.infile))== NCZM_UNDEF)
zmapusage();
if((dumpoptions.rootpath = rootpathfor(dumpoptions.infile))== NULL)
zmapusage();
switch (dumpoptions.mop) {
default:
fprintf(stderr,"Default action: objdump\n");
/* fall thru */
case MOP_OBJDUMP:
if((stat = objdump())) goto done;
break;
}
done:
/* Reclaim dumpoptions */
nullfree(dumpoptions.rootpath);
nc_finalize();
if(stat)
fprintf(stderr,"fail: %s\n",nc_strerror(stat));
return (stat ? 1 : 0);
fail:
stat = NC_EINVAL;
goto done;
}
static NCZM_IMPL
implfor(const char* path)
{
NCURI* uri = NULL;
const char* mode = NULL;
NClist* segments = nclistnew();
int i;
NCZM_IMPL impl = NCZM_UNDEF;
ncuriparse(path,&uri);
if(uri == NULL) goto done;
mode = ncurifragmentlookup(uri,"mode");
if(mode == NULL) goto done;
/* split on commas */
NCCHECK(nczm_split_delim(mode,',',segments));
for(i=0;i<nclistlength(segments);i++) {
const char* value = nclistget(segments,i);
if(strcmp(value,"file")==0) {impl = NCZM_FILE; goto done;}
if(strcmp(value,"zip")==0) {impl = NCZM_ZIP; goto done;}
if(strcmp(value,"s3")==0) {impl = NCZM_S3; goto done;}
}
done:
ncurifree(uri);
nclistfreeall(segments);
return impl;
}
static char*
rootpathfor(const char* path)
{
int stat = NC_NOERR;
NCURI* uri = NULL;
char* rootpath = NULL;
NClist* segments = nclistnew();
char* p = NULL;
ncuriparse(path,&uri);
if(uri == NULL) goto done;
switch (dumpoptions.impl) {
case NCZM_FILE:
case NCZM_ZIP:
rootpath = strdup("/"); /*constant*/
break;
case NCZM_S3:
/* Split the path part */
if((stat = nczm_split(uri->path,segments))) goto done;
/* remove the bucket name */
p = (char*)nclistremove(segments,0);
nullfree(p); p = NULL;
/* Put it back together */
if((stat = nczm_join(segments,&rootpath))) goto done;
break;
default:
stat = NC_EINVAL;
goto done;
}
done:
nclistfreeall(segments); segments = NULL;
ncurifree(uri); uri = NULL;
if(stat)
{nullfree(rootpath); rootpath = NULL;}
return rootpath;
}
static int
objdump(void)
{
int stat = NC_NOERR;
NCZMAP* map = NULL;
NClist* stack = nclistnew();
char* obj = NULL;
char* content = NULL;
int depth;
if((stat=nczmap_open(dumpoptions.impl, dumpoptions.infile, NC_NOCLOBBER, 0, NULL, &map)))
goto done;
/* Depth first walk all the groups to get all keys */
if((stat = breadthfirst(map,"/",stack))) goto done;
if(dumpoptions.debug) {
int i;
fprintf(stderr,"stack:\n");
for(i=0;i<nclistlength(stack);i++)
fprintf(stderr,"[%d] %s\n",i,(char*)nclistget(stack,i));
}
for(depth=0;nclistlength(stack) > 0;depth++) {
size64_t len = 0;
OBJKIND kind = 0;
int hascontent = 0;
obj = nclistremove(stack,0); /* zero pos is always top of stack */
kind = keykind(obj);
/* Now print info for this obj key */
switch (stat=nczmap_len(map,obj,&len)) {
case NC_NOERR: hascontent = 1; break;
case NC_EEMPTY: /* fall thru */ /* this is not a content bearing key */
case NC_EACCESS: hascontent = 0; len = 0; stat = NC_NOERR; break;
default: goto done;
}
if(!hascontent) goto next; /* ignore it */
if(len > 0) {
size_t padlen = (len+dumpoptions.nctype->typesize);
content = calloc(1,padlen+1);
if((stat=nczmap_read(map,obj,0,len,content))) goto done;
content[len] = '\0';
} else {
content = NULL;
}
if(hascontent) {
if(len > 0) {
assert(content != NULL);
if(kind == OK_CHUNK) {
len = ceildiv(len,dumpoptions.nctype->typesize);
}
printf("[%d] %s : (%llu)",depth,obj,len);
if(kind == OK_CHUNK)
printf(" (%s)",dumpoptions.nctype->typename);
printf(" |");
switch(kind) {
case OK_GROUP:
case OK_META:
printcontent(len,content,kind);
break;
case OK_CHUNK:
if(dumpoptions.meta_only)
printf("...");
else
printcontent(len,content,kind);
break;
default: break;
}
printf("|\n");
} else {
printf("[%d] %s : (%llu) ||\n",depth,obj,len);
}
} else {
printf("[%d] %s\n",depth,obj);
}
nullfree(content); content = NULL;
next:
nullfree(obj); obj = NULL;
}
done:
nullfree(obj);
nullfree(content);
nczmap_close(map,0);
nclistfreeall(stack);
return stat;
}
/* Depth first walk all the groups to get all keys */
static int
breadthfirstR(NCZMAP* map, NCbytes* prefix, NClist* stack)
{
int stat = NC_NOERR;
NClist* nextlevel = nclistnew();
size_t mark;
const char* content;
int isroot = 0;
content = ncbytescontents(prefix);
if(content[0] == '/' && content[1] == '\0') isroot = 1;
if((stat=nczmap_search(map,content,nextlevel))) goto done;
/* Sort nextlevel */
sortlist(nextlevel);
/* Push new names onto the stack and recurse */
mark = ncbyteslength(prefix); /* save this position */
while(nclistlength(nextlevel) > 0) {
char* subkey = nclistremove(nextlevel,0);
if(!isroot) ncbytescat(prefix,"/");
ncbytescat(prefix,subkey);
nullfree(subkey);
nclistpush(stack,ncbytesdup(prefix));
if((stat = breadthfirstR(map,prefix,stack))) goto done;
ncbytessetlength(prefix,mark); ncbytesnull(prefix);
}
done:
nclistfreeall(nextlevel);
return stat;
}
/* Depth first walk all the groups to get all keys */
static int
breadthfirst(NCZMAP* map, const char* key, NClist* stack)
{
int stat = NC_NOERR;
NCbytes* prefix = ncbytesnew();
if(key == NULL || key[0] == '\0')
key = "/";
ncbytescat(prefix,key);
if(strlen(key) > 1 && key[strlen(key)-1]=='/') {
ncbytessetlength(prefix,ncbyteslength(prefix)-1); /* remove trailing '/' */
ncbytesnull(prefix);
}
stat = breadthfirstR(map,prefix,stack);
ncbytesfree(prefix);
return stat;
}
static char hex[16] = "0123456789abcdef";
static void
printcontent(size64_t len, const char* content, OBJKIND kind)
{
size64_t i;
unsigned int c0,c1;
for(i=0;i<len;i++) {
/* If kind is chunk, then len is # of values, not # of bytes */
switch(kind) {
case OK_CHUNK:
if(i > 0) printf(", ");
switch(dumpoptions.nctype->nctype) {
case NC_BYTE: printf("%d",((char*)content)[i]); break;
case NC_SHORT: printf("%d",((short*)content)[i]); break;
case NC_INT: printf("%d",((int*)content)[i]); break;
case NC_INT64: printf("%lld",((long long*)content)[i]); break;
case NC_UBYTE: printf("%u",((unsigned char*)content)[i]); break;
case NC_USHORT: printf("%u",((unsigned short*)content)[i]); break;
case NC_UINT: printf("%u",((unsigned int*)content)[i]); break;
case NC_UINT64: printf("%llu",((unsigned long long*)content)[i]); break;
case NC_FLOAT: printf("%f",((float*)content)[i]); break;
case NC_DOUBLE: printf("%lf",((double*)content)[i]); break;
default: abort();
}
break;
case OK_META:
printf("%c",content[i]);
break;
default:
c1 = (unsigned char)(content[i]);
c0 = c1 & 0xf;
c1 = (c1 >> 4);
c0 = hex[c0];
c1 = hex[c1];
printf("%c%c",(char)c1,(char)c0);
}
}
}
static char chunkchars[] = ".0123456789";
static OBJKIND
keykind(const char* key)
{
OBJKIND kind = OK_NONE;
char* suffix = NULL;
if(nczm_divide_at(key,-1,NULL,&suffix) == NC_NOERR) {
if(suffix) {
if(suffix[0] != '/')
kind = OK_NONE;
else if(suffix[1] == '.') {
if(strcmp(&suffix[1],".zmetadata")==0 && (dumpoptions.xflags & XNOZMETADATA))
kind = OK_IGNORE;
else
kind = OK_META;
} else if(suffix[strlen(suffix)-1] == '/')
kind = OK_GROUP;
else {
char* p = suffix+1;
for(;*p;p++) {
if(strchr(chunkchars,*p) == NULL) break;
}
kind = OK_CHUNK;
}
}
}
nullfree(suffix);
return kind;
}
/* bubble sort a list of strings */
static void
sortlist(NClist* l)
{
size_t i, switched;
if(nclistlength(l) <= 1) return;
do {
switched = 0;
for(i=0;i<nclistlength(l)-1;i++) {
char* ith = nclistget(l,i);
char* ith1 = nclistget(l,i+1);
if(strcmp(ith,ith1) > 0) {
nclistset(l,i,ith1);
nclistset(l,i+1,ith);
switched = 1;
}
}
} while(switched);
#if 0
for(i=0;i<nclistlength(l);i++)
fprintf(stderr,"sorted: [%d] %s\n",i,(const char*)nclistget(l,i));
#endif
}
static const char* urlexts[] = {"nzf", "zip", "nz4", NULL};
static const char*
filenamefor(const char* f0)
{
static char result[4096];
const char** extp;
char* p;
NCURI* uri = NULL;
strcpy(result,f0); /* default */
ncuriparse(f0,&uri);
if(uri == NULL) {
/* Not a URL */
p = strrchr(f0,'.'); /* look at the extension, if any */
if(p == NULL) goto done; /* No extension */
p++;
for(extp=urlexts;*extp;extp++) {
if(strcmp(p,*extp)==0) break;
}
if(*extp == NULL) goto done; /* not found */
/* Assemble the url */
strcpy(result,"file://");
strcat(result,f0); /* core path */
strcat(result,"#mode=nczarr,");
strcat(result,*extp);
}
done:
ncurifree(uri);
return result;
}