qstring: add nasm_unquote() supporting ...

Add a nasm_unquote() function supporting the intended `...` syntax.
This commit is contained in:
H. Peter Anvin 2008-06-01 16:07:48 -07:00
parent 311d27d328
commit 7f2f8b35e6

205
nasmlib.c
View File

@ -950,6 +950,211 @@ void nasm_quote(char **str)
*str = p;
}
static char *emit_utf8(char *q, int32_t v)
{
if (v < 0) {
/* Impossible - do nothing */
} else if (v <= 0x7f) {
*q++ = v;
} else if (v <= 0x000007ff) {
*q++ = 0xc0 | (v >> 6);
*q++ = 0x80 | (v & 63);
} else if (v <= 0x0000ffff) {
*q++ = 0xe0 | (v >> 12);
*q++ = 0x80 | ((v >> 6) & 63);
*q++ = 0x80 | (v & 63);
} else if (v <= 0x001fffff) {
*q++ = 0xf0 | (v >> 18);
*q++ = 0x80 | ((v >> 12) & 63);
*q++ = 0x80 | ((v >> 6) & 63);
*q++ = 0x80 | (v & 63);
} else if (v <= 0x03ffffff) {
*q++ = 0xf8 | (v >> 24);
*q++ = 0x80 | ((v >> 18) & 63);
*q++ = 0x80 | ((v >> 12) & 63);
*q++ = 0x80 | ((v >> 6) & 63);
*q++ = 0x80 | (v & 63);
} else {
*q++ = 0xfc | (v >> 30);
*q++ = 0x80 | ((v >> 24) & 63);
*q++ = 0x80 | ((v >> 18) & 63);
*q++ = 0x80 | ((v >> 12) & 63);
*q++ = 0x80 | ((v >> 6) & 63);
*q++ = 0x80 | (v & 63);
}
return q;
}
/*
* Do an *in-place* dequoting of the specified string, returning the
* resulting length (which may be containing embedded nulls.)
*
* In-place replacement is possible since the unquoted length is always
* shorter than or equal to the quoted length.
*/
size_t nasm_unquote(char *str)
{
size_t ln;
char bq, eq;
char *p, *q, *ep, *escp;
char c;
enum unq_state {
st_start,
st_backslash,
st_hex,
st_oct,
st_ucs,
} state;
int ndig = 0;
int32_t nval = 0;
bq = str[0];
if (!bq)
return 0;
ln = strlen(str);
eq = str[ln-1];
if ((bq == '\'' || bq == '\"') && bq == eq) {
/* '...' or "..." string */
memmove(str, str+1, ln-2);
str[ln-2] = '\0';
return ln-2;
}
if (bq == '`' || eq == '`') {
/* `...` string */
q = str;
p = str+1;
ep = str+ln-1;
state = st_start;
while (p < ep) {
c = *p++;
switch (state) {
case st_start:
if (c == '\\')
state = st_backslash;
else
*q++ = c;
break;
case st_backslash:
state = st_start;
escp = p-1;
switch (c) {
case 'a':
*q++ = 7;
break;
case 'b':
*q++ = 8;
break;
case 'e':
*q++ = 27;
break;
case 'f':
*q++ = 12;
break;
case 'n':
*q++ = 10;
break;
case 'r':
*q++ = 13;
break;
case 't':
*q++ = 9;
break;
case 'u':
state = st_ucs;
ndig = 4;
nval = 0;
break;
case 'U':
state = st_ucs;
ndig = 8;
nval = 0;
break;
case 'v':
*q++ = 11;
case 'x':
case 'X':
state = st_hex;
ndig = nval = 0;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
state = st_oct;
ndig = 1;
nval = c - '0';
break;
default:
*q++ = c;
break;
}
break;
case st_oct:
if (c >= '0' && c <= '7') {
nval = (nval << 3) + (c - '0');
if (++ndig >= 3) {
*q++ = nval;
state = st_start;
}
} else {
p--; /* Process this character again */
*q++ = nval;
state = st_start;
}
break;
case st_hex:
if ((c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f')) {
nval = (nval << 4) + numvalue(c);
if (++ndig >= 2) {
*q++ = nval;
state = st_start;
}
} else {
p--; /* Process this character again */
*q++ = ndig ? nval : *escp;
state = st_start;
}
break;
case st_ucs:
if ((c >= '0' && c <= '9') ||
(c >= 'A' && c <= 'F') ||
(c >= 'a' && c <= 'f')) {
nval = (nval << 4) + numvalue(c);
if (!--ndig) {
q = emit_utf8(q, nval);
state = st_start;
}
} else {
p--; /* Process this character again */
if (p > escp+1)
q = emit_utf8(q, nval);
else
*q++ = *escp;
state = st_start;
}
break;
}
}
*q = '\0';
return q-str;
}
/* Otherwise, just return the input... */
return ln;
}
char *nasm_strcat(char *one, char *two)
{
char *rslt;