overwrote

This commit is contained in:
dmh 2013-09-23 14:19:40 -06:00
parent a4d3f8306c
commit 1aa2f695a0
5 changed files with 187 additions and 104 deletions

View File

@ -38,7 +38,7 @@ c_constant(Generator* generator, NCConstant* con, Bytebuffer* buf,...)
if(con->value.charv == '\'')
bbprintf(codetmp,"'\\''");
else
bbprintf(codetmp,"'%c'",con->value.charv);
bbprintf(codetmp,"'%s'",cescapifychar(con->value.charv,'\'',);
break;
case NC_BYTE:
bbprintf(codetmp,"%hhd",con->value.int8v);

View File

@ -429,8 +429,13 @@ case CASE(NC_STRING,NC_CHAR):
tmp.charv = src->value.stringv.stringv[0];
break;
case CASE(NC_STRING,NC_STRING):
tmp.stringv.stringv = nulldup(src->value.stringv.stringv);
/* Need to watch out for embedded NULs */
tmp.stringv.len = src->value.stringv.len;
tmp.stringv.stringv = (char*)malloc(src->value.stringv.len+1);
memcpy((void*)tmp.stringv.stringv,
(void*)src->value.stringv.stringv,
tmp.stringv.len);
tmp.stringv[tmp.stringv.len] = '\0';
break;
/* What is the proper conversion for T->STRING?*/

View File

@ -7,12 +7,16 @@
#include "includes.h"
#include "ConvertUTF.h"
#define HEXCHARS "0123456789abcdefABCDEF"
#define OCTCHARS "01234567"
/* Forward*/
static void initcodify(void);
static char* ccodify(const char*);
static char* f77codify(const char*);
static char* jcodify(const char*);
#if 0
/*
* Replace escaped chars in CDL representation of name such as
* 'abc\:def\ gh\\i' with unescaped version, such as 'abc:def gh\i'.
@ -52,13 +56,15 @@ deescapify(char* name)
efree(newname);
return;
}
#endif /*0*/
/*
Given a character c, fill s with the character suitably escaped.
E.g. c = '\t' => s="\t"
Caller must ensure enough space
Currently does not handle unicode
Returns s as it result.
Caller must ensure enough space.
Watch out for embedded NULs.
Currently passes unicode thru unchanged.
Returns s as its result.
*/
char*
@ -71,6 +77,8 @@ escapifychar(unsigned int c, char* s0, int quote)
*s++ = '\\'; *s++=(char)quote;
} else if(c >= ' ' && c != '\177') {
*s++ = (char)c;
} else if((c & 0x80) != 0) {/* Unicode */
*s++ = (char)c;
} else {
switch (c) {
case '\b': strcpy(s,"\\b"); s+=2; break;
@ -280,6 +288,14 @@ ccodify(const char *name0)
return pooldup(name);
}
char*
cescapifychar(unsigned int c, int quote)
{
char* s = poolalloc(4+1);
escapifychar(c,s,quote);
return s;
}
/**************************************************/
/* CML String Escapes */
/**************************************************/
@ -573,3 +589,143 @@ _DOT__
}
return newname;
}
/**************************************************/
/*
* Given a pointer to a string of the form
* 'xdd', return the corresponding hex byte
*/
int
unescapehex(const char* s)
{
int b;
int c1 = s[0];
int c2 = s[1];
if(strchr(HEXCHARS,c1) == NULL
|| strchr(HEXCHARS,c2) == NULL)
return -1;
b = 0;
if(c1 < 'a') c1 = (c1 - 'A') + 'a';/* lowercase */
if(c1 <= '9') b = (c1 - '0') << 4;
else b = ((c1 - 'a')+10) << 4;
if(c2 < 'a') c2 |= (c2 - 'A') + 'a';/* lowercase */
if(c2 <= '9') b = (c2 - '0');
else b |= ((c2 - 'a')+10);
return b;
}
/*
* Given a pointer to a string of the form
* 'ddd', return the corresponding octal byte
*/
int
unescapeoct(const char* s)
{
int b;
int c1 = s[0];
int c2 = s[1];
int c3 = s[2];
if(c1 != '0'
|| strchr(OCTCHARS,c1) == NULL
|| strchr(OCTCHARS,c2) == NULL
|| strchr(OCTCHARS,c3) == NULL)
return -1;
b = (c1 - '0') << 6;
b |= (c2 - '0') << 3;
b |= (c3 - '0');
return b;
}
/*
* "Un-escapes" valid escape sequences in yystring (read by lex) into the
* apropriate unescaped characters. For example, the two character
* sequence "\t" in yystring would be converted into a single tab character.
* On return, termstring is nul terminated.
* Watch out for embedded nuls and utf-8 characters.
* Return # of characters written.
*/
int
unescape(
char *s, /* fill with contents of yytext, with escapes removed.
s and yytext may be same*/
const char *yytext,
int yyleng)
{
char *t, *tend, *p;
int len;
int b;
/* ignore leading and trailing quotes */
if(yytext[0] != '"' || yytext[yyleng-1] != '"')
abort();
yytext++;
yyleng--; /* leading quote */
yyleng--; /* trailing quote */
/* expand "\" escapes, e.g. "\t" to tab character */
t = yytext;
tend = t + yyleng;
p = s;
while(*t && t < tend) {
if (*t == '\\') {
t++;
switch (*t) {
case 'a':
*p++ = ('\007'); t++; /* will use '\a' when STDC */
break;
case 'b':
*p++ = ('\b'); t++;
break;
case 'f':
*p++ = ('\f'); t++;
break;
case 'n':
*p++ = ('\n'); t++;
break;
case 'r':
*p++ = ('\r'); t++;
break;
case 't':
*p++ = ('\t'); t++;
break;
case 'v':
*p++ = ('\v'); t++;
break;
case '\\':
*p++ = ('\\'); t++;
break;
case '?':
*p++ = ('\177'); t++;
break;
case '\'':
*p++ = ('\''); t++;
break;
case '\"':
*p++ = ('\"'); t++;
break;
case 'x':
/* t now points to hex */
b = unescapehex(t);
t += 2;
*p++ = ((char)b);
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
/* t now points to octal */
b = unescapeoct(t);
t += 3;
*p++ = ((char)b);
break;
default:
*p++ = (*t); t++;
break;
}
} else {
*p++ = (*t); t++;
}
}
*p = '\0';
return (p - s);
}

View File

@ -176,7 +176,8 @@ gen_charconstant(NCConstant* con, Bytebuffer* databuf, int fillchar)
break;
case NC_STRING:
constsize = con->value.stringv.len;
bbCat(databuf,con->value.stringv.stringv);
bbAppendn(databuf,con->value.stringv.stringv,
con->value.stringv.len);
bbNull(databuf);
break;
case NC_FILL:

View File

@ -66,7 +66,6 @@ unsigned char ubyte_val; /* last byte value read */
static Symbol* makepath(char* text);
static int lexdebug(int);
static int parseLL(char* text);
static void expand_escapes(Bytebuffer*, char*, int);
static struct Specialtoken {
char* name;
@ -174,7 +173,7 @@ USASCII [\x01-\x7F]
break;
}
\"{nonquotes}\" {
\"{nonquotes}\" {int len;
/* In netcdf4, this will be used in a variety
of places, so only remove escapes */
/*
@ -183,9 +182,11 @@ yyerror("string too long, truncated\n");
yytext[MAXTRST-1] = '\0';
}
*/
/* Assumes expand escapes also does normalization */
bbClear(lextext);
expand_escapes(lextext,(char *)yytext,yyleng);
/* FIX: Assumes unescape also does normalization */
bbSetLength(lextext,yyleng);
len = unescape(bbContents(lextext),
(char *)yytext,yyleng);
bbSetLength(lextext,len);
bbNull(lextext);
return lexdebug(TERMSTRING);
}
@ -303,12 +304,13 @@ NIL|nil|Nil {
return lexdebug(DATASETID);
}
{ID} { char* id;
{ID} { char* id; int len;
bbClear(lextext);
bbAppendn(lextext,(char*)yytext,yyleng+1); /* include null */
bbNull(lextext);
id = bbContents(lextext);
deescapify(id);
len = unescape(id,id,bbLength(lextext));
bbSetLength(lextext,len);
if (STREQ(id, FILL_STRING)) return lexdebug(FILLMARKER);
yylval.sym = install(id);
return lexdebug(IDENT);
@ -507,11 +509,19 @@ NIL|nil|Nil {
return lexdebug(BYTE_CONST);
}
\'\\[0-7][0-7]?[0-7]?\' {
byte_val = (char) strtol((char*)&yytext[2], (char **) 0, 8);
byte_val = unescapeoct(&yytext[2]);
if(byte_val < 0) {
sprintf(errstr,"bad octal character constant: %s",(char*)yytext);
yyerror(errstr);
}
return lexdebug(BYTE_CONST);
}
\'\\[xX][0-9a-fA-F][0-9a-fA-F]?\' {
byte_val = (char) strtol((char*)&yytext[3], (char **) 0, 16);
byte_val = unescapehex(&yytext[3]);
if(byte_val < 0) {
sprintf(errstr,"bad hex character constant: %s",(char*)yytext);
yyerror(errstr);
}
return lexdebug(BYTE_CONST);
}
\'\\.\' {
@ -678,93 +688,4 @@ parseLL(char* text)
return result;
}
/*
* "Expands" valid escape sequences in yystring (read by lex) into the
* apropriate characters in termstring. For example, the two character
* sequence "\t" in yystring would be converted into a single tab character
* in termstring. On return, termstring is properly terminated.
*/
static void
expand_escapes(
Bytebuffer *s, /* fill with contents of yytext, with escapes expanded */
char *yytext,
int yyleng)
{
char *t, *endp, *tend;
/* ignore leading and trailing quotes */
if(yytext[0] != '"' || yytext[yyleng-1] != '"')
abort();
yytext++;
yyleng--; /* leading quote */
yyleng--; /* trailing quote */
/* expand "\" escapes, e.g. "\t" to tab character */
t = yytext;
tend = t + yyleng;
while(*t && t < tend) {
if (*t == '\\') {
t++;
switch (*t) {
case 'a':
bbAppend(s,'\007'); t++; /* will use '\a' when STDC */
break;
case 'b':
bbAppend(s,'\b'); t++;
break;
case 'f':
bbAppend(s,'\f'); t++;
break;
case 'n':
bbAppend(s,'\n'); t++;
break;
case 'r':
bbAppend(s,'\r'); t++;
break;
case 't':
bbAppend(s,'\t'); t++;
break;
case 'v':
bbAppend(s,'\v'); t++;
break;
case '\\':
bbAppend(s,'\\'); t++;
break;
case '?':
bbAppend(s,'\177'); t++;
break;
case '\'':
bbAppend(s,'\''); t++;
break;
case '\"':
bbAppend(s,'\"'); t++;
break;
case 'x':
t++; /* now t points to one or more hex digits */
bbAppend(s,(char) strtol(t, &endp, 16));
t = endp;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
/* t now points to octal digits */
bbAppend(s,(char) strtol(t, &endp, 8));
t = endp;
break;
default:
bbAppend(s,*t); t++;
break;
}
} else {
bbAppend(s,*t); t++;
}
}
bbNull(s);
bbSetlength(s,strlen(bbContents(s)));
return;
}