mirror of
https://github.com/Unidata/netcdf-c.git
synced 2024-12-03 08:01:25 +08:00
582 lines
14 KiB
C
582 lines
14 KiB
C
/*********************************************************************
|
|
* Copyright 1993, UCAR/Unidata
|
|
* See netcdf/COPYRIGHT file for copying and redistribution conditions.
|
|
* $Header: /upc/share/CVS/netcdf-3/ncgen/escapes.c,v 1.5 2010/04/04 19:39:44 dmh Exp $
|
|
*********************************************************************/
|
|
|
|
#include "includes.h"
|
|
#include "ConvertUTF.h"
|
|
|
|
/* Forward*/
|
|
static void initdecodify(void);
|
|
|
|
/*
|
|
* "Expands" valid escape sequences in yystring (read by lex) into the
|
|
* apropriate characters in termstring. For example, the two character
|
|
* sequence "\t" in yystring would be converted into a single tab character
|
|
* in termstring. On return, termstring is properly terminated.
|
|
*/
|
|
|
|
void
|
|
expand_escapes(
|
|
char *termstring, /* returned, with escapes expanded */
|
|
char *yytext,
|
|
int yyleng)
|
|
{
|
|
char *s, *t, *endp;
|
|
|
|
yytext[yyleng-1]='\0'; /* don't copy quotes */
|
|
/* expand "\" escapes, e.g. "\t" to tab character */
|
|
s = termstring;
|
|
t = yytext+1;
|
|
while(*t) {
|
|
if (*t == '\\') {
|
|
t++;
|
|
switch (*t) {
|
|
case 'a':
|
|
*s++ = '\007'; t++; /* will use '\a' when STDC */
|
|
break;
|
|
case 'b':
|
|
*s++ = '\b'; t++;
|
|
break;
|
|
case 'f':
|
|
*s++ = '\f'; t++;
|
|
break;
|
|
case 'n':
|
|
*s++ = '\n'; t++;
|
|
break;
|
|
case 'r':
|
|
*s++ = '\r'; t++;
|
|
break;
|
|
case 't':
|
|
*s++ = '\t'; t++;
|
|
break;
|
|
case 'v':
|
|
*s++ = '\v'; t++;
|
|
break;
|
|
case '\\':
|
|
*s++ = '\\'; t++;
|
|
break;
|
|
case '?':
|
|
*s++ = '\177'; t++;
|
|
break;
|
|
case '\'':
|
|
*s++ = '\''; t++;
|
|
break;
|
|
case '\"':
|
|
*s++ = '\"'; t++;
|
|
break;
|
|
case 'x':
|
|
t++; /* now t points to one or more hex digits */
|
|
*s++ = (char) strtol(t, &endp, 16);
|
|
t = endp;
|
|
break;
|
|
case '0':
|
|
case '1':
|
|
case '2':
|
|
case '3':
|
|
case '4':
|
|
case '5':
|
|
case '6':
|
|
case '7':
|
|
/* t now points to octal digits */
|
|
*s++ = (char) strtol(t, &endp, 8);
|
|
t = endp;
|
|
break;
|
|
default:
|
|
*s++ = *t++;
|
|
break;
|
|
}
|
|
} else {
|
|
*s++ = *t++;
|
|
}
|
|
}
|
|
*s = '\0';
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Replace escaped chars in CDL representation of name such as
|
|
* 'abc\:def\ gh\\i' with unescaped version, such as 'abc:def gh\i'.
|
|
*/
|
|
/* ?? This seems redundant over expand_escapes*/
|
|
void
|
|
deescapify(char *name)
|
|
{
|
|
const char *cp = name;
|
|
char *sp;
|
|
size_t len = strlen(name);
|
|
char *newname;
|
|
|
|
if(strchr(name, '\\') == NULL)
|
|
return;
|
|
|
|
newname = (char *) emalloc(len + 1);
|
|
cp = name;
|
|
sp = newname;
|
|
while(*cp != '\0') { /* delete '\' chars, except change '\\' to '\' */
|
|
switch (*cp) {
|
|
case '\\':
|
|
if(*(cp+1) == '\\') {
|
|
*sp++ = '\\';
|
|
cp++;
|
|
}
|
|
break;
|
|
default:
|
|
*sp++ = *cp;
|
|
break;
|
|
}
|
|
cp++;
|
|
}
|
|
*sp = '\0';
|
|
/* ASSERT(strlen(newname) <= strlen(name)); */
|
|
strncpy(name, newname, len+1); /* watch out for trailing null*/
|
|
efree(newname);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
Given a character c, fill s with the character suitably escaped.
|
|
E.g. c = '\t' => s="\t"
|
|
Caller must ensure enough space
|
|
Currently does not handle unicode
|
|
Returns s as it result.
|
|
*/
|
|
|
|
char*
|
|
escapifychar(unsigned int c, char* s0, int quote)
|
|
{
|
|
char* s = s0;
|
|
if(c == '\\') {
|
|
*s++ = '\\'; *s++='\\';
|
|
} else if(c == quote) {
|
|
*s++ = '\\'; *s++=(char)quote;
|
|
} else if(c >= ' ' && c != '\177') {
|
|
*s++ = (char)c;
|
|
} else {
|
|
switch (c) {
|
|
case '\b': strcpy(s,"\\b"); s+=2; break;
|
|
case '\f': strcpy(s,"\\f"); s+=2; break;
|
|
case '\n': strcpy(s,"\\n"); s+=2; break;
|
|
case '\r': strcpy(s,"\\r"); s+=2; break;
|
|
case '\t': strcpy(s,"\\t"); s+=2; break;
|
|
case '\v': strcpy(s,"\\v"); s+=2; break;
|
|
default: {
|
|
unsigned int oct1 = (c & 007);
|
|
unsigned int oct2 = ((c >> 3) & 007);
|
|
unsigned int oct3 = ((c >> 6) & 003);
|
|
*s++ = '\\';
|
|
*s++ = oct3 + '0';
|
|
*s++ = oct2 + '0';
|
|
*s++ = oct1 + '0';
|
|
} break;
|
|
}
|
|
}
|
|
*s = '\0';
|
|
return s0;
|
|
}
|
|
|
|
/* Return a pool string that is s0 with all characters*/
|
|
/* ecaped that require it. The resulting string is not*/
|
|
/* surrounded by quotes.*/
|
|
/* Since the string might actually contain nulls, specify the length.*/
|
|
|
|
char*
|
|
escapify(char* s0, int quote, size_t len)
|
|
{
|
|
int i;
|
|
char* result;
|
|
result = poolalloc(1+4*len); /* overkill to support maximal expansion*/
|
|
result[0] = '\0';
|
|
for(i=0;i<len;i++) {
|
|
char tmp[8];
|
|
escapifychar((unsigned int)s0[i],tmp,quote);
|
|
strcat(result,tmp);
|
|
}
|
|
return result;
|
|
}
|
|
|
|
char*
|
|
escapifyname(char* s0)
|
|
{
|
|
return escapify(s0,'"',strlen(s0));
|
|
}
|
|
|
|
void
|
|
cquotestring(Bytebuffer* databuf)
|
|
{
|
|
char* escaped = escapify(bbContents(databuf),'"',bbLength(databuf));
|
|
bbClear(databuf);
|
|
bbAppend(databuf,'"');
|
|
bbCat(databuf,escaped);
|
|
bbAppend(databuf,'"');
|
|
}
|
|
|
|
/*
|
|
* Replace special chars in name so it can be used in C and Fortran
|
|
* variable names without causing syntax errors. Here we just replace
|
|
* each "-" in a name with "_MINUS_", each "." with "_PERIOD_", etc.
|
|
* For bytes with high bit set, from UTF-8 encoding of Unicode, just
|
|
* replace with "_xHH", where each H is the appropriate hex digit.
|
|
* However, if the utf flag is set, then just pass utf characters as is.
|
|
* If a name begins with a number N, such as "4LFTX", replace with
|
|
* "DIGIT_N_", such as "DIGIT_4_LFTX".
|
|
* Note that apparently, FORTRAN will not allow a leading underscore,
|
|
* so remove if we are doing fortran.
|
|
*
|
|
* It is required that decodify be idempotent:
|
|
* i.e. decodify(decodify(s)) == decodify(s)
|
|
*
|
|
* Returned name is pool alloc'd so is transient
|
|
*/
|
|
|
|
static int init = 0;
|
|
static char* repls[256]; /* replacement string for each char */
|
|
static int lens[256]; /* lengths of replacement strings */
|
|
static struct {
|
|
char c;
|
|
char *s;
|
|
} ctable[] = {
|
|
{' ', "_SPACE_"},
|
|
{'!', "_EXCLAMATION_"},
|
|
{'"', "_QUOTATION_"},
|
|
{'#', "_HASH_"},
|
|
{'$', "_DOLLAR_"},
|
|
{'%', "_PERCENT_"},
|
|
{'&', "_AMPERSAND_"},
|
|
{'\'', "_APOSTROPHE_"},
|
|
{'(', "_LEFTPAREN_"},
|
|
{')', "_RIGHTPAREN_"},
|
|
{'*', "_ASTERISK_"},
|
|
{'+', "_PLUS_"},
|
|
{',', "_COMMA_"},
|
|
{'-', "_MINUS_"},
|
|
{'.', "_PERIOD_"},
|
|
{':', "_COLON_"},
|
|
{';', "_SEMICOLON_"},
|
|
{'<', "_LESSTHAN_"},
|
|
{'=', "_EQUALS_"},
|
|
{'>', "_GREATERTHAN_"},
|
|
{'?', "_QUESTION_"},
|
|
{'@', "_ATSIGN_"},
|
|
{'[', "_LEFTBRACKET_"},
|
|
{'\\', "_BACKSLASH_"},
|
|
{']', "_RIGHTBRACKET_"},
|
|
{'^', "_CIRCUMFLEX_"},
|
|
{'`', "_BACKQUOTE_"},
|
|
{'{', "_LEFTCURLY_"},
|
|
{'|', "_VERTICALBAR_"},
|
|
{'}', "_RIGHTCURLY_"},
|
|
{'~', "_TILDE_"},
|
|
{'/', "_SLASH_"} /* should not occur in names */
|
|
/* {'_', "_UNDERSCORE_"} */
|
|
};
|
|
static int idtlen;
|
|
static int hexlen;
|
|
static Bytebuffer* newname;
|
|
|
|
static void
|
|
initdecodify(void)
|
|
{
|
|
int nctable = (sizeof(ctable))/(sizeof(ctable[0]));
|
|
int i;
|
|
char *rp;
|
|
|
|
newname = bbNew();
|
|
idtlen = strlen("DIGIT_n_"); /* initial digit template */
|
|
hexlen = strlen("_XHH"); /* template for hex of non-ASCII bytes */
|
|
for(i = 0; i < 128; i++) {
|
|
rp = emalloc(2);
|
|
rp[0] = i;
|
|
rp[1] = '\0';
|
|
repls[i] = rp;
|
|
}
|
|
for(i=0; i < nctable; i++) {
|
|
size_t j = ctable[i].c;
|
|
efree(repls[j]);
|
|
repls[j] = ctable[i].s;
|
|
}
|
|
for(i = 128; i < 256; i++) {
|
|
rp = emalloc(hexlen+1);
|
|
snprintf(rp, hexlen+1, "_X%2.2X", i); /* need to include null*/
|
|
rp[hexlen] = '\0';
|
|
repls[i] = rp;
|
|
}
|
|
for(i = 0; i < 256; i++) {
|
|
lens[i] = strlen(repls[i]);
|
|
}
|
|
init = 1; /* only do this initialization once */
|
|
}
|
|
|
|
char*
|
|
decodify(const char *name0)
|
|
{
|
|
const unsigned char *cp;
|
|
unsigned int c;
|
|
char* name;
|
|
|
|
if(init == 0) initdecodify();
|
|
bbClear(newname);
|
|
cp = (const unsigned char*) name0;
|
|
if('0' <= *cp && *cp <= '9') { /* handle initial digit, if any */
|
|
char tmp[16];
|
|
snprintf(tmp,sizeof(tmp),"DIGIT_%c_", *cp);
|
|
bbCat(newname,tmp);
|
|
cp++;
|
|
}
|
|
while((c=*cp++)) { /* copy name to newname, replacing special chars */
|
|
ASSERT(c <= 256);
|
|
bbCat(newname,repls[c]);
|
|
}
|
|
/* If FORTRAN, remove leading _, if any */
|
|
name = bbContents(newname);
|
|
if(bbGet(newname,0) == '_') name++;
|
|
return pooldup(name);
|
|
}
|
|
|
|
/**************************************************/
|
|
/* CML String Escapes */
|
|
/**************************************************/
|
|
|
|
/*
|
|
Given a character c, fill s with the character suitably escaped
|
|
for use with xml.
|
|
Caller must ensure enough space
|
|
Currently does not handle unicode
|
|
Returns s as it result.
|
|
*/
|
|
|
|
static char hexdigits[] = "0123456789ABCDEF";
|
|
|
|
static char printescapable[] = "\"&<>";
|
|
static char* printescape[] = {"quot", "amp", "lt", "gt"};
|
|
|
|
static void
|
|
xescapifychar(unsigned int c, int quote, Bytebuffer* s)
|
|
{
|
|
if(c >= ' ' && c < '\177') {
|
|
char* p;
|
|
char** q;
|
|
for(p=printescapable,q=printescape;*p;p++,q++) {if(c==*p) break;}
|
|
if(*p) {
|
|
bbAppend(s,'&');
|
|
bbCat(s,*q);
|
|
bbAppend(s,';');
|
|
} else
|
|
bbAppend(s,(char)c);
|
|
} else {
|
|
/* Do hex escape */
|
|
unsigned int hex1 = (c & 0x0f);
|
|
unsigned int hex2 = ((c >> 4) & 0x0f);
|
|
bbCat(s,"&#");
|
|
bbAppend(s,hexdigits[hex2]);
|
|
bbAppend(s,hexdigits[hex1]);
|
|
bbAppend(s,';');
|
|
}
|
|
}
|
|
|
|
/* Return a pool string that is s0 with all characters
|
|
ecaped that require it. The resulting string is not
|
|
surrounded by quotes.
|
|
Since the string might actually contain nulls, specify the length.
|
|
*/
|
|
|
|
char*
|
|
xescapify(char* s0, int quote, size_t len)
|
|
{
|
|
int i;
|
|
char* result;
|
|
Bytebuffer* escaped = bbNew();
|
|
for(i=0;i<len;i++) {
|
|
xescapifychar((unsigned int)s0[i],quote,escaped);
|
|
}
|
|
result = pooldup(bbContents(escaped));
|
|
bbFree(escaped);
|
|
return result;
|
|
}
|
|
|
|
/**************************************************/
|
|
/* Java String Escapes */
|
|
/**************************************************/
|
|
|
|
/*
|
|
Given a utf16 character c,
|
|
fill s with the characters needed
|
|
to suitably escape c for use with Java.
|
|
*/
|
|
|
|
static void
|
|
jescapifychar(UTF16 c, int quote, Bytebuffer* s)
|
|
{
|
|
/* Separate out ascii from UTF16 */
|
|
if(c <= '\177') {
|
|
/* Separate printables from controls */
|
|
if(c >= ' ' && c < '\177') {
|
|
if (c == quote) {
|
|
bbAppend(s,'\\');
|
|
}
|
|
bbAppend(s,(char)c);
|
|
} else switch (c) {
|
|
case '\t': bbCat(s,"\\t"); break;
|
|
case '\b': bbCat(s,"\\b"); break;
|
|
case '\n': bbCat(s,"\\n"); break;
|
|
case '\r': bbCat(s,"\\r"); break;
|
|
case '\f': bbCat(s,"\\f"); break;
|
|
default:
|
|
{ /* Do hex escape */
|
|
int hex1 = (c & 0x0f);
|
|
int hex2 = ((c >> 4) & 0x0f);
|
|
int hex3 = ((c >> 8) & 0x0f);
|
|
int hex4 = ((c >> 12) & 0x0f);
|
|
bbAppend(s,'\\');
|
|
bbAppend(s,'u');
|
|
bbAppend(s,hexdigits[hex4]);
|
|
bbAppend(s,hexdigits[hex3]);
|
|
bbAppend(s,hexdigits[hex2]);
|
|
bbAppend(s,hexdigits[hex1]);
|
|
} break;
|
|
}
|
|
} else { /* Do \uxxxx escapes */
|
|
/* Do hex escape */
|
|
int hex1 = (c & 0x0f);
|
|
int hex2 = ((c >> 4) & 0x0f);
|
|
int hex3 = ((c >> 8) & 0x0f);
|
|
int hex4 = ((c >> 12) & 0x0f);
|
|
bbAppend(s,'\\');
|
|
bbAppend(s,'u');
|
|
bbAppend(s,hexdigits[hex4]);
|
|
bbAppend(s,hexdigits[hex3]);
|
|
bbAppend(s,hexdigits[hex2]);
|
|
bbAppend(s,hexdigits[hex1]);
|
|
}
|
|
}
|
|
|
|
/* Return a pool string that is s0 with all characters
|
|
ecaped that require it. The resulting string is not
|
|
surrounded by quotes.
|
|
Since the string might actually contain nulls, specify the length.
|
|
*/
|
|
|
|
char*
|
|
jescapify(char* s0, int quote, size_t len)
|
|
{
|
|
int i;
|
|
char* result;
|
|
UTF8* s8;
|
|
UTF16* s16; /* for storing the utf16 string */
|
|
UTF16* tmp16; /* for storing the utf16 string */
|
|
ConversionResult status;
|
|
Bytebuffer* escaped = bbNew();
|
|
size_t len16;
|
|
|
|
s16 = emalloc((1+len)*sizeof(UTF16));
|
|
s8 = (UTF8*)s0;
|
|
tmp16 = s16;
|
|
status = ConvertUTF8toUTF16((const UTF8**)&s8,s8+len,&tmp16,tmp16+len,lenientConversion);
|
|
if(status != conversionOK) {
|
|
derror("Cannot convert UTF8 string to UTF16: %s",s0);
|
|
return NULL;
|
|
}
|
|
/* Get the length of the utf16 string */
|
|
len16 = (tmp16 - s16);
|
|
for(i=0;i<len16;i++) {
|
|
jescapifychar(s16[i],quote,escaped);
|
|
}
|
|
efree(s16);
|
|
result = pooldup(bbContents(escaped));
|
|
bbFree(escaped);
|
|
return result;
|
|
}
|
|
|
|
char*
|
|
jescapifyname(char* s0)
|
|
{
|
|
return jescapify(s0,'"',strlen(s0));
|
|
}
|
|
|
|
/*
|
|
Convert a java name that might possibly
|
|
contain utf8 characters to one that is
|
|
acceptable to the Java compiler.
|
|
Basically this means convert the printables
|
|
using decodify (above) equivalent and then escape
|
|
all the utf chars.
|
|
*/
|
|
char*
|
|
jdecodify (const char *name)
|
|
{
|
|
return decodify(name);
|
|
}
|
|
|
|
/* FORTRAN does escapes differently than e.g. C */
|
|
|
|
char*
|
|
f77escapifychar(unsigned int c, char* s0)
|
|
{
|
|
char* s = s0;
|
|
s0[0] = '\0';
|
|
if(c == '\'') {
|
|
*s++ = '\''; *s++='\'';
|
|
} else if(c >= ' ' && c < '\177') {
|
|
*s++ = (char)c;
|
|
} else {
|
|
char tmp[32];
|
|
nprintf(tmp,sizeof(tmp),"//char(%u)",c);
|
|
strcat(s,tmp);
|
|
s += strlen(tmp);
|
|
}
|
|
*s = '\0';
|
|
return s0;
|
|
}
|
|
|
|
void
|
|
f77quotestring(Bytebuffer* databuf)
|
|
{
|
|
int i;
|
|
int lastcharescaped;
|
|
unsigned int slen = bbLength(databuf);
|
|
unsigned char* s;
|
|
|
|
/* Handle the empty string case */
|
|
if(slen == 0) {
|
|
bbCat(databuf,"char(0)");
|
|
return;
|
|
}
|
|
|
|
s = (unsigned char*)emalloc(slen+1);
|
|
memcpy((void*)s,bbContents(databuf),slen);
|
|
s[slen] = '\0';
|
|
bbClear(databuf);
|
|
|
|
lastcharescaped = 0;
|
|
for(i=0;i<slen;i++) {
|
|
char tmp[32];
|
|
unsigned int c = s[i];
|
|
int thischarescaped = (c < ' ' || c >= '\177');
|
|
if(i > 0) {
|
|
if(!lastcharescaped && thischarescaped) bbAppend(databuf,'\'');
|
|
else if(lastcharescaped && !thischarescaped) bbCat(databuf,"//'");
|
|
} else if(!thischarescaped)
|
|
bbAppend(databuf,'\'');
|
|
f77escapifychar(c,tmp);
|
|
if(i == 0 && thischarescaped)
|
|
bbCat(databuf,tmp+2);
|
|
else
|
|
bbCat(databuf,tmp);
|
|
lastcharescaped = thischarescaped;
|
|
}
|
|
if(!lastcharescaped) bbAppend(databuf,'\'');
|
|
}
|
|
|
|
char*
|
|
f77escapifyname(char* s0)
|
|
{
|
|
Bytebuffer* buf = bbNew();
|
|
char* name;
|
|
bbCat(buf,s0);
|
|
f77quotestring(buf);
|
|
name = bbDup(buf);
|
|
bbFree(buf);
|
|
return name;
|
|
}
|