netcdf-c/libdap4/d4util.c
Dennis Heimbigner 36102e3c32 Improve UTF8 Support On Windows
re: Issue https://github.com/Unidata/netcdf-c/issues/2190

The primary purpose of this PR is to improve the utf8 support
for windows. This is persuant to a change in Windows that
supports utf8 natively (almost). The almost means that it is
still utf16 internally and the set of characters representable
by utf8 is larger than those representable by utf16.

This leaves open the question in the Issue about handling
the Windows 1252 character set.

This required the following changes:

1. Test the Windows build and major version in order to see if
   native utf8 is supported.
2. If native utf8 is supported, Modify dpathmgr.c to call the 8-bit
   version of the windows fopen() and open() functions.
3. In support of this, programs that use XGetOpt (Windows versions)
   need to get the command line as utf8 and then parse to
   arc+argv as utf8. This requires using a homegrown command line parser
   named XCommandLineToArgvA.
4. Add a utility program called "acpget" that prints out the
   current Windows code page and locale.

Additionally, some technical debt was cleaned up as follows:

1. Unify all the places which attempt to read all or a part
   of a file into the dutil.c#NC_readfile code.
2. Similary unify all the code that creates temp files into
   dutil.c#NC_mktmp code.
3. Convert almost all remaining calls to fopen() and open()
   to NCfopen() and NCopen3(). This is to ensure that path management
   is used consistently. This touches a number of files.
4. extern->EXTERNL as needed to get it to work under Windows.
2022-02-08 20:53:30 -07:00

429 lines
9.7 KiB
C

/*********************************************************************
* Copyright 2018, UCAR/Unidata
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
*********************************************************************/
#include "d4includes.h"
#ifdef HAVE_SYS_STAT_H
#include <sys/stat.h>
#endif
#ifdef HAVE_FCNTL_H
#include <fcntl.h>
#endif
#ifdef _MSC_VER
#include <io.h>
#endif
extern int mkstemp(char *template);
#define LBRACKET '['
#define RBRACKET ']'
/**************************************************/
/* Forward */
static char* backslashEscape(const char* s);
/**************************************************/
/**
* Provide a hidden interface to allow utilities
* to check if a given path name is really an ncdap4 url.
* If no, return null, else return basename of the url
* minus any extension.
*/
int
ncd4__testurl(const char* path, char** basenamep)
{
NCURI* uri;
int ok = NC_NOERR;
if(ncuriparse(path,&uri))
ok = NC_EURL;
else {
char* slash = (uri->path == NULL ? NULL : strrchr(uri->path, '/'));
char* dot;
if(slash == NULL) slash = (char*)path; else slash++;
slash = nulldup(slash);
if(slash == NULL)
dot = NULL;
else
dot = strrchr(slash, '.');
if(dot != NULL && dot != slash) *dot = '\0';
if(basenamep)
*basenamep=slash;
else if(slash)
free(slash);
}
ncurifree(uri);
return ok;
}
/* Return 1 if this machine is little endian */
int
NCD4_isLittleEndian(void)
{
union {
unsigned char bytes[SIZEOF_INT];
int i;
} u;
u.i = 1;
return (u.bytes[0] == 1 ? 1 : 0);
}
/* Compute the size of an atomic type, except opaque */
size_t
NCD4_typesize(nc_type tid)
{
switch(tid) {
case NC_BYTE: case NC_UBYTE: case NC_CHAR: return 1;
case NC_SHORT: case NC_USHORT: return sizeof(short);
case NC_INT: case NC_UINT: return sizeof(int);
case NC_FLOAT: return sizeof(float);
case NC_DOUBLE: return sizeof(double);
case NC_INT64: case NC_UINT64: return sizeof(long long);
case NC_STRING: return sizeof(char*);
default: break;
}
return 0;
}
d4size_t
NCD4_dimproduct(NCD4node* node)
{
int i;
d4size_t product = 1;
for(i=0;i<nclistlength(node->dims);i++) {
NCD4node* dim = (NCD4node*)nclistget(node->dims,i);
product *= dim->dim.size;
}
return product;
}
/* Caller must free return value */
char*
NCD4_makeFQN(NCD4node* node)
{
char* fqn = NULL;
char* escaped;
int i;
NCD4node* g = node;
NClist* path = nclistnew();
size_t estimate;
for(estimate=0;g != NULL;g=g->container) {
estimate += strlen(g->name);
nclistinsert(path,0,g);
}
estimate = (estimate*2) + 2*nclistlength(path);
estimate++; /*strlcat nul*/
fqn = (char*)malloc(estimate+1);
if(fqn == NULL) goto done;
fqn[0] = '\0';
/* Create the group-based fqn prefix */
/* start at 1 to avoid dataset */
for(i=1;i<nclistlength(path);i++) {
NCD4node* elem = (NCD4node*)nclistget(path,i);
if(elem->sort != NCD4_GROUP) break;
/* Add in the group name */
escaped = backslashEscape(elem->name);
if(escaped == NULL) {free(fqn); fqn = NULL; goto done;}
strlcat(fqn,"/",estimate);
strlcat(fqn,escaped,estimate);
free(escaped);
}
/* Add in the final name part (if not group) */
if(i < nclistlength(path)) {
int last = nclistlength(path)-1;
NCD4node* n = (NCD4node*)nclistget(path,last);
char* name = NCD4_makeName(n,".");
strlcat(fqn,"/",estimate);
strlcat(fqn,name,estimate);
nullfree(name);
}
done:
nclistfree(path);
return fqn;
}
/*
create the last part of the fqn
(post groups)
*/
char*
NCD4_makeName(NCD4node* elem, const char* sep)
{
int i;
size_t estimate = 0;
NCD4node* n;
NClist* path = nclistnew();
char* fqn = NULL;
/* Collect the path up to, but not including, the first containing group */
for(estimate=0,n=elem;n->sort != NCD4_GROUP;n=n->container) {
nclistinsert(path,0,n);
estimate += (1+(2*strlen(n->name)));
}
estimate++; /*strlcat nul*/
fqn = (char*)malloc(estimate+1);
if(fqn == NULL) goto done;
fqn[0] = '\0';
for(i=0;i<nclistlength(path);i++) {
NCD4node* elem = (NCD4node*)nclistget(path,i);
char* escaped = backslashEscape(elem->name);
if(escaped == NULL) {free(fqn); fqn = NULL; goto done;}
if(i > 0)
strlcat(fqn,sep,estimate);
strlcat(fqn,escaped,estimate);
free(escaped);
}
done:
nclistfree(path);
return fqn;
}
static char*
backslashEscape(const char* s)
{
const char* p;
char* q;
size_t len;
char* escaped = NULL;
len = strlen(s);
escaped = (char*)malloc(1+(2*len)); /* max is everychar is escaped */
if(escaped == NULL) return NULL;
for(p=s,q=escaped;*p;p++) {
char c = *p;
switch (c) {
case '\\':
case '/':
case '.':
case '@':
*q++ = '\\'; *q++ = '\\';
break;
default: *q++ = c; break;
}
}
*q = '\0';
return escaped;
}
/* Parse an fqn into a sequence of names;
using '/', and then (conditionally) '.' */
int
NCD4_parseFQN(const char* fqn0, NClist* pieces)
{
int ret = NC_NOERR;
int count;
char* p;
char* start;
char* fqn = NULL;
if(fqn0 == NULL) fqn0 = "/";
fqn = strdup(fqn0[0] == '/' ? fqn0+1 : fqn0);
start = fqn;
/* Step 0: insert rootname */
nclistpush(pieces,strdup("/"));
/* Step 1: Break fqn into pieces at occurrences of '/' */
count = 0;
for(p=start;*p;) {
switch(*p) {
case '\\': /* leave the escapes in place */
p+=2;
break;
case '/': /*capture the piece name */
*p++ = '\0';
start = p; /* mark start of the next part */
count++;
break;
default: /* ordinary char */
p++;
break;
}
}
#ifdef ALLOWFIELDMAPS
/* Step 2, walk the final piece to break up based on '.' */
for(p=start;*p;) {
switch(*p) {
case '\\': /* leave the escapes in place */
p+=2;
break;
case '.': /*capture the piece name */
*p++ = '\0';
start = p;
count++;
break;
default: /* ordinary char */
p++;
break;
}
}
#endif
count++; /* acct for last piece */
/* Step 3: capture and de-scape the pieces */
for(p=fqn;count > 0;count--) {
char* descaped = NCD4_deescape(p);
nclistpush(pieces,descaped);
p = p + strlen(p) + 1; /* skip past the terminating nul */
}
if(fqn != NULL) free(fqn);
return THROW(ret);
}
char*
NCD4_deescape(const char* esc)
{
size_t len;
char* s;
const char* p;
char* q;
if(esc == NULL) return NULL;
len = strlen(esc);
s = (char*)malloc(len+1);
if(s == NULL) return NULL;
for(p=esc,q=s;*p;) {
switch (*p) {
case '\\':
p++;
/* fall thru */
default: *q++ = *p++; break;
}
}
*q = '\0';
return s;
}
char*
NCD4_entityescape(const char* s)
{
const char* p;
char* q;
size_t len;
char* escaped = NULL;
const char* entity;
len = strlen(s);
escaped = (char*)malloc(1+(6*len)); /* 6 = |&apos;| */
if(escaped == NULL) return NULL;
for(p=s,q=escaped;*p;p++) {
char c = *p;
switch (c) {
case '&': entity = "&amp;"; break;
case '<': entity = "&lt;"; break;
case '>': entity = "&gt;"; break;
case '"': entity = "&quot;"; break;
case '\'': entity = "&apos;"; break;
default : entity = NULL; break;
}
if(entity == NULL)
*q++ = c;
else {
len = strlen(entity);
memcpy(q,entity,len);
q+=len;
}
}
*q = '\0';
return escaped;
}
/* Elide all nul characters from an XML document as a precaution*/
size_t
NCD4_elidenuls(char* s, size_t slen)
{
size_t i,j;
for(j=0,i=0;i<slen;i++) {
int c = s[i];
if(c != 0)
s[j++] = (char)c;
}
/* if we remove any nuls then nul term */
if(j < i)
s[j] = '\0';
return j;
}
void
NCD4_hostport(NCURI* uri, char* space, size_t len)
{
if(space != NULL && len > 0) {
space[0] = '\0'; /* so we can use strlcat */
if(uri->host != NULL) {
strlcat(space,uri->host,len);
if(uri->port != NULL) {
strlcat(space,":",len);
strlcat(space,uri->port,len);
}
}
}
}
void
NCD4_userpwd(NCURI* uri, char* space, size_t len)
{
if(space != NULL && len > 0) {
space[0] = '\0'; /* so we can use strlcat */
if(uri->user != NULL && uri->password != NULL) {
strlcat(space,uri->user,len);
strlcat(space,":",len);
strlcat(space,uri->password,len);
}
}
}
/**************************************************/
/* Error reporting */
int
NCD4_error(int code, const int line, const char* file, const char* fmt, ...)
{
va_list argv;
fprintf(stderr,"(%s:%d) ",file,line);
va_start(argv,fmt);
ncvlog(NCLOGERR,fmt,argv);
return code;
}
int
NCD4_errorNC(int code, const int line, const char* file)
{
return NCD4_error(code,line,file,nc_strerror(code));
}
d4size_t
NCD4_getcounter(void* p)
{
COUNTERTYPE v;
memcpy(&v,p,sizeof(v));
return (d4size_t)v;
}
void*
NCD4_getheader(void* p, NCD4HDR* hdr, int hostlittleendian)
{
unsigned char bytes[4];
memcpy(bytes,p,sizeof(bytes));
p = INCR(p,4); /* on-the-wire hdr is 4 bytes */
/* assume header is network (big) order */
hdr->flags = bytes[0]; /* big endian => flags are in byte 0 */
hdr->flags &= NCD4_ALL_CHUNK_FLAGS; /* Ignore extraneous flags */
bytes[0] = 0; /* so we can do byte swap to get count */
if(hostlittleendian)
swapinline32(bytes); /* host is little endian */
hdr->count = *(unsigned int*)bytes; /* get count */
return p;
}
void
NCD4_reporterror(NCD4INFO* state)
{
NCD4meta* meta = state->substrate.metadata;
char* u = NULL;
if(meta == NULL) return;
u = ncuribuild(state->uri,NULL,NULL,NCURIALL);
fprintf(stderr,"***FAIL: url=%s httpcode=%d errmsg->\n%s\n",u,meta->error.httpcode,meta->error.message);
}