overwrote

2025-01-30 16:10:44 +08:00 · 2013-09-23 14:19:40 -06:00 · 2013-09-23 14:19:40 -06:00 · 1aa2f695a0
commit 1aa2f695a0
parent a4d3f8306c
5 changed files with 187 additions and 104 deletions
--- a/ncgen/cdata.c
+++ b/ncgen/cdata.c
@ -38,7 +38,7 @@ c_constant(Generator* generator, NCConstant* con, Bytebuffer* buf,...)
 	if(con->value.charv == '\'') 
 	    bbprintf(codetmp,"'\\''");
 	else
-	    bbprintf(codetmp,"'%c'",con->value.charv);
+	    bbprintf(codetmp,"'%s'",cescapifychar(con->value.charv,'\'',);
 	break;
    case NC_BYTE:
 	bbprintf(codetmp,"%hhd",con->value.int8v);
--- a/ncgen/cvt.c
+++ b/ncgen/cvt.c
@ -429,8 +429,13 @@ case CASE(NC_STRING,NC_CHAR):
     tmp.charv = src->value.stringv.stringv[0];
     break;
 case CASE(NC_STRING,NC_STRING):
-    tmp.stringv.stringv = nulldup(src->value.stringv.stringv);
+    /* Need to watch out for embedded NULs */
    tmp.stringv.len = src->value.stringv.len;
+    tmp.stringv.stringv = (char*)malloc(src->value.stringv.len+1);
+    memcpy((void*)tmp.stringv.stringv,
+           (void*)src->value.stringv.stringv,
+           tmp.stringv.len);
+    tmp.stringv[tmp.stringv.len] = '\0';
    break;

 /* What is the proper conversion for T->STRING?*/
--- a/ncgen/escapes.c
+++ b/ncgen/escapes.c
@ -7,12 +7,16 @@
 #include "includes.h"
 #include "ConvertUTF.h"

+#define HEXCHARS "0123456789abcdefABCDEF"
+#define OCTCHARS "01234567"
+
 /* Forward*/
 static void initcodify(void);
 static char* ccodify(const char*);
 static char* f77codify(const char*);
 static char* jcodify(const char*);

+#if 0
 /*
 * Replace escaped chars in CDL representation of name such as
 * 'abc\:def\ gh\\i' with unescaped version, such as 'abc:def gh\i'.
@ -52,13 +56,15 @@ deescapify(char* name)
    efree(newname);
    return;
 }
+#endif /*0*/

 /*
 Given a character c, fill s with the character suitably escaped.
 E.g. c = '\t' => s="\t"
-Caller must ensure enough space
-Currently does not handle unicode
-Returns s as it result.
+Caller must ensure enough space.
+Watch out for embedded NULs.
+Currently passes unicode thru unchanged.
+Returns s as its result.
 */

 char*
@ -71,6 +77,8 @@ escapifychar(unsigned int c, char* s0, int quote)
 	*s++ = '\\'; *s++=(char)quote;
    } else if(c >= ' ' && c != '\177') {
 	*s++ = (char)c;
+    } else if((c & 0x80) != 0) {/* Unicode */
+	*s++ = (char)c;
    } else {
        switch (c) {
 	case '\b': strcpy(s,"\\b"); s+=2; break;
@ -280,6 +288,14 @@ ccodify(const char *name0)
    return pooldup(name);
 }

+char*
+cescapifychar(unsigned int c, int quote)
+{
+    char* s = poolalloc(4+1);
+    escapifychar(c,s,quote);
+    return s;
+}
+
 /**************************************************/
 /* CML String Escapes */
 /**************************************************/
@ -573,3 +589,143 @@ _DOT__
    }
    return newname;
 }
+
+/**************************************************/
+
+/*
+ * Given a pointer to a string of the form
+ * 'xdd', return the corresponding hex byte
+ */
+
+int
+unescapehex(const char* s)
+{
+    int b;
+    int c1 = s[0];
+    int c2 = s[1];
+    if(strchr(HEXCHARS,c1) == NULL
+       || strchr(HEXCHARS,c2) == NULL)
+	return -1;
+    b = 0;
+    if(c1 < 'a') c1 = (c1 - 'A') + 'a';/* lowercase */
+    if(c1 <= '9') b = (c1 - '0') << 4;
+    else b = ((c1 - 'a')+10) << 4;
+    if(c2 < 'a') c2 |= (c2 - 'A') + 'a';/* lowercase */
+    if(c2 <= '9') b = (c2 - '0');
+    else b |= ((c2 - 'a')+10);
+    return b;
+}
+
+/*
+ * Given a pointer to a string of the form
+ * 'ddd', return the corresponding octal byte
+ */
+
+int
+unescapeoct(const char* s)
+{
+    int b;
+    int c1 = s[0];
+    int c2 = s[1];
+    int c3 = s[2];
+    if(c1 != '0'
+       || strchr(OCTCHARS,c1) == NULL
+       || strchr(OCTCHARS,c2) == NULL
+       || strchr(OCTCHARS,c3) == NULL)
+	return -1;
+    b = (c1 - '0') << 6;
+    b |= (c2 - '0') << 3;
+    b |= (c3 - '0');
+    return b;
+}
+
+/*
+ * "Un-escapes" valid escape sequences in yystring (read by lex) into the
+ * apropriate unescaped characters.  For example, the two character
+ * sequence "\t" in yystring would be converted into a single tab character.
+ * On return, termstring is nul terminated.
+ * Watch out for embedded nuls and utf-8 characters.
+ * Return # of characters written.
+ */
+
+int
+unescape(
+     char *s, /* fill with contents of yytext, with escapes removed.
+                 s and yytext may be same*/
+     const char *yytext,
+     int yyleng)
+{
+    char *t, *tend, *p;
+    int len;
+    int b;
+    /* ignore leading and trailing quotes */
+    if(yytext[0] != '"' || yytext[yyleng-1] != '"')
+	abort();
+    yytext++;
+    yyleng--; /* leading quote */
+    yyleng--; /* trailing quote */
+    /* expand "\" escapes, e.g. "\t" to tab character  */
+    t = yytext;
+    tend = t + yyleng;
+    p = s;
+    while(*t && t < tend) {
+	if (*t == '\\') {
+	    t++;
+	    switch (*t) {
+	      case 'a':
+		*p++ = ('\007'); t++; /* will use '\a' when STDC */
+		break;
+	      case 'b':
+		*p++ = ('\b'); t++;
+		break;
+	      case 'f':
+		*p++ = ('\f'); t++;
+		break;
+	      case 'n':
+		*p++ = ('\n'); t++;
+		break;
+	      case 'r':
+		*p++ = ('\r'); t++;
+		break;
+	      case 't':
+		*p++ = ('\t'); t++;
+		break;
+	      case 'v':
+		*p++ = ('\v'); t++;
+		break;
+	      case '\\':
+		*p++ = ('\\'); t++;
+		break;
+	      case '?':
+		*p++ = ('\177'); t++;
+		break;
+	      case '\'':
+		*p++ = ('\''); t++;
+		break;
+	      case '\"':
+		*p++ = ('\"'); t++;
+		break;
+	      case 'x':
+		/* t now points to hex */
+		b = unescapehex(t);
+		t += 2;
+		*p++ = ((char)b);
+		break;
+	      case '0': case '1': case '2': case '3':
+	      case '4': case '5': case '6': case '7':
+		/* t now points to octal */
+		b = unescapeoct(t);
+		t += 3;
+		*p++ = ((char)b);
+		break;
+	      default:
+		*p++ = (*t); t++;
+		break;
+	    }
+	} else {
+	    *p++ = (*t); t++;
+	}
+    }
+    *p = '\0';
+    return (p - s);
+}
--- a/ncgen/genchar.c
+++ b/ncgen/genchar.c
@ -176,7 +176,8 @@ gen_charconstant(NCConstant* con, Bytebuffer* databuf, int fillchar)
        break;
    case NC_STRING:
 	constsize = con->value.stringv.len;
-        bbCat(databuf,con->value.stringv.stringv);
+        bbAppendn(databuf,con->value.stringv.stringv,
+                         con->value.stringv.len);
        bbNull(databuf);
        break;
    case NC_FILL:
--- a/ncgen/ncgen.l
+++ b/ncgen/ncgen.l
@ -66,7 +66,6 @@ unsigned char ubyte_val;       /* last byte value read */
 static Symbol* makepath(char* text);
 static int lexdebug(int);
 static int parseLL(char* text);
-static void expand_escapes(Bytebuffer*, char*, int);

 static struct Specialtoken {
    char* name;
@ -174,7 +173,7 @@ USASCII   [\x01-\x7F]
                          break;
                        }

-\"{nonquotes}\"		{
+\"{nonquotes}\"		{int len;
 			 /* In netcdf4, this will be used in a variety
                            of places, so only remove escapes */
 /*
@ -183,9 +182,11 @@ yyerror("string too long, truncated\n");
 yytext[MAXTRST-1] = '\0';
 }
 */
-		        /* Assumes expand escapes also does normalization */
-			bbClear(lextext);
-			expand_escapes(lextext,(char *)yytext,yyleng);
+		        /* FIX: Assumes unescape also does normalization */
+			bbSetLength(lextext,yyleng);
+			len = unescape(bbContents(lextext),
+                                       (char *)yytext,yyleng);
+			bbSetLength(lextext,len);
 			bbNull(lextext);
 		 	return lexdebug(TERMSTRING);
 		        }
@ -303,12 +304,13 @@ NIL|nil|Nil {
 		    return lexdebug(DATASETID);
 		}

-{ID}		{ char* id;
+{ID}		{ char* id; int len;
 		    bbClear(lextext);
 		    bbAppendn(lextext,(char*)yytext,yyleng+1); /* include null */
 		    bbNull(lextext);
 		    id = bbContents(lextext);
-		    deescapify(id);
+		    len = unescape(id,id,bbLength(lextext));
+		    bbSetLength(lextext,len);
 		    if (STREQ(id, FILL_STRING)) return lexdebug(FILLMARKER);
 		    yylval.sym = install(id);
 		    return lexdebug(IDENT);
@ -507,11 +509,19 @@ NIL|nil|Nil {
 		return lexdebug(BYTE_CONST);
                }
 \'\\[0-7][0-7]?[0-7]?\'  {
-		byte_val = (char) strtol((char*)&yytext[2], (char **) 0, 8);
+		byte_val = unescapeoct(&yytext[2]);
+		if(byte_val < 0) {
+		    sprintf(errstr,"bad octal character constant: %s",(char*)yytext);
+		    yyerror(errstr);
+		}		    		    
 		return lexdebug(BYTE_CONST);
                }
 \'\\[xX][0-9a-fA-F][0-9a-fA-F]?\'  {
-		byte_val = (char) strtol((char*)&yytext[3], (char **) 0, 16);
+		byte_val = unescapehex(&yytext[3]);
+		if(byte_val < 0) {
+		    sprintf(errstr,"bad hex character constant: %s",(char*)yytext);
+		    yyerror(errstr);
+		}		    		    
 		return lexdebug(BYTE_CONST);
                }
 \'\\.\'        {
@ -678,93 +688,4 @@ parseLL(char* text)
    return result;
 }

-/*
- * "Expands" valid escape sequences in yystring (read by lex) into the
- * apropriate characters in termstring.  For example, the two character
- * sequence "\t" in yystring would be converted into a single tab character
- * in termstring.  On return, termstring is properly terminated.
- */
-
-static void
-expand_escapes(
-     Bytebuffer *s, /* fill with contents of yytext, with escapes expanded */
-     char *yytext,
-     int yyleng)
-{
-    char *t, *endp, *tend;
-    /* ignore leading and trailing quotes */
-    if(yytext[0] != '"' || yytext[yyleng-1] != '"')
-	abort();
-    yytext++;
-    yyleng--; /* leading quote */
-    yyleng--; /* trailing quote */
-    /* expand "\" escapes, e.g. "\t" to tab character  */
-    t = yytext;
-    tend = t + yyleng;
-    while(*t && t < tend) {
-	if (*t == '\\') {
-	    t++;
-	    switch (*t) {
-	      case 'a':
-		bbAppend(s,'\007'); t++; /* will use '\a' when STDC */
-		break;
-	      case 'b':
-		bbAppend(s,'\b'); t++;
-		break;
-	      case 'f':
-		bbAppend(s,'\f'); t++;
-		break;
-	      case 'n':
-		bbAppend(s,'\n'); t++;
-		break;
-	      case 'r':
-		bbAppend(s,'\r'); t++;
-		break;
-	      case 't':
-		bbAppend(s,'\t'); t++;
-		break;
-	      case 'v':
-		bbAppend(s,'\v'); t++;
-		break;
-	      case '\\':
-		bbAppend(s,'\\'); t++;
-		break;
-	      case '?':
-		bbAppend(s,'\177'); t++;
-		break;
-	      case '\'':
-		bbAppend(s,'\''); t++;
-		break;
-	      case '\"':
-		bbAppend(s,'\"'); t++;
-		break;
-	      case 'x':
-		t++; /* now t points to one or more hex digits */
-		bbAppend(s,(char) strtol(t, &endp, 16));
-		t = endp;
-		break;
-	      case '0':
-	      case '1':
-	      case '2':
-	      case '3':
-	      case '4':
-	      case '5':
-	      case '6':
-	      case '7':
-		/* t now points to octal digits */
-		bbAppend(s,(char) strtol(t, &endp, 8));
-		t = endp;
-		break;
-	      default:
-		bbAppend(s,*t); t++;
-		break;
-	    }
-	} else {
-	    bbAppend(s,*t); t++;
-	}
-    }
-    bbNull(s);
-    bbSetlength(s,strlen(bbContents(s)));
-    return;
-}