binutils-gdb/binutils/rclex.c
Alan Modra d87bef3a7b Update year range in copyright notice of binutils files
The newer update-copyright.py fixes file encoding too, removing cr/lf
on binutils/bfdtest2.c and ld/testsuite/ld-cygwin/exe-export.exp, and
embedded cr in binutils/testsuite/binutils-all/ar.exp string match.
2023-01-01 21:50:11 +10:30

905 lines
18 KiB
C

/* rclex.c -- lexer for Windows rc files parser */
/* Copyright (C) 1997-2023 Free Software Foundation, Inc.
Written by Kai Tietz, Onevision.
This file is part of GNU Binutils.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
02110-1301, USA. */
/* This is a lexer used by the Windows rc file parser. It basically
just recognized a bunch of keywords. */
#include "sysdep.h"
#include "bfd.h"
#include "bucomm.h"
#include "libiberty.h"
#include "safe-ctype.h"
#include "windres.h"
#include "rcparse.h"
#include <assert.h>
/* Whether we are in rcdata mode, in which we returns the lengths of
strings. */
static int rcdata_mode;
/* Whether we are suppressing lines from cpp (including windows.h or
headers from your C sources may bring in externs and typedefs).
When active, we return IGNORED_TOKEN, which lets us ignore these
outside of resource constructs. Thus, it isn't required to protect
all the non-preprocessor lines in your header files with #ifdef
RC_INVOKED. It also means your RC file can't include other RC
files if they're named "*.h". Sorry. Name them *.rch or whatever. */
static int suppress_cpp_data;
#define IGNORE_CPP(x) (suppress_cpp_data ? IGNORED_TOKEN : (x))
/* The first filename we detect in the cpp output. We use this to
tell included files from the original file. */
static char *initial_fn;
/* List of allocated strings. */
struct alloc_string
{
struct alloc_string *next;
char *s;
};
static struct alloc_string *strings;
struct rclex_keywords
{
const char *name;
int tok;
};
#define K(KEY) { #KEY, KEY }
#define KRT(KEY) { #KEY, RT_##KEY }
static const struct rclex_keywords keywds[] =
{
K(ACCELERATORS), K(ALT), K(ANICURSOR), K(ANIICON), K(ASCII),
K(AUTO3STATE), K(AUTOCHECKBOX), K(AUTORADIOBUTTON),
K(BEDIT), { "BEGIN", BEG }, K(BITMAP), K(BLOCK), K(BUTTON),
K(CAPTION), K(CHARACTERISTICS), K(CHECKBOX), K(CHECKED),
K(CLASS), K(COMBOBOX), K(CONTROL), K(CTEXT), K(CURSOR),
K(DEFPUSHBUTTON), K(DIALOG), K(DIALOGEX), K(DISCARDABLE),
K(DLGINCLUDE), K(DLGINIT),
K(EDITTEXT), K(END), K(EXSTYLE),
K(FILEFLAGS), K(FILEFLAGSMASK), K(FILEOS), K(FILESUBTYPE),
K(FILETYPE), K(FILEVERSION), K(FIXED), K(FONT), K(FONTDIR),
K(GRAYED), KRT(GROUP_CURSOR), KRT(GROUP_ICON), K(GROUPBOX),
K(HEDIT), K(HELP), K(HTML),
K(ICON), K(IEDIT), K(IMPURE), K(INACTIVE),
K(LANGUAGE), K(LISTBOX), K(LOADONCALL), K(LTEXT),
K(MANIFEST), K(MENU), K(MENUBARBREAK), K(MENUBREAK),
K(MENUEX), K(MENUITEM), K(MESSAGETABLE), K(MOVEABLE),
K(NOINVERT), K(NOT), K(OWNERDRAW),
K(PLUGPLAY), K(POPUP), K(PRELOAD), K(PRODUCTVERSION),
K(PURE), K(PUSHBOX), K(PUSHBUTTON),
K(RADIOBUTTON), K(RCDATA), K(RTEXT),
K(SCROLLBAR), K(SEPARATOR), K(SHIFT), K(STATE3),
K(STRINGTABLE), K(STYLE),
K(TOOLBAR),
K(USERBUTTON),
K(VALUE), { "VERSION", VERSIONK }, K(VERSIONINFO),
K(VIRTKEY), K(VXD),
{ NULL, 0 },
};
/* External input stream from resrc */
extern FILE *cpp_pipe;
/* Lexical scanner helpers. */
static int rclex_lastch = -1;
static size_t rclex_tok_max = 0;
static size_t rclex_tok_pos = 0;
static char *rclex_tok = NULL;
static int
rclex_translatekeyword (const char *key)
{
if (key && ISUPPER (key[0]))
{
const struct rclex_keywords *kw = &keywds[0];
do
{
if (! strcmp (kw->name, key))
return kw->tok;
++kw;
}
while (kw->name != NULL);
}
return STRING;
}
/* Handle a C preprocessor line. */
static void
cpp_line (void)
{
const char *s = rclex_tok;
int line;
char *send, *fn;
size_t len, mlen;
++s;
while (ISSPACE (*s))
++s;
/* Check for #pragma code_page ( DEFAULT | <nr>). */
len = strlen (s);
mlen = strlen ("pragma");
if (len > mlen && memcmp (s, "pragma", mlen) == 0 && ISSPACE (s[mlen]))
{
const char *end;
s += mlen + 1;
while (ISSPACE (*s))
++s;
len = strlen (s);
mlen = strlen ("code_page");
if (len <= mlen || memcmp (s, "code_page", mlen) != 0)
/* FIXME: We ought to issue a warning message about an unrecognised pragma. */
return;
s += mlen;
while (ISSPACE (*s))
++s;
if (*s != '(')
/* FIXME: We ought to issue an error message about a malformed pragma. */
return;
++s;
while (ISSPACE (*s))
++s;
if (*s == 0 || (end = strchr (s, ')')) == NULL)
/* FIXME: We ought to issue an error message about a malformed pragma. */
return;
len = (size_t) (end - s);
fn = xmalloc (len + 1);
if (len)
memcpy (fn, s, len);
fn[len] = 0;
while (len > 0 && (fn[len - 1] > 0 && fn[len - 1] <= 0x20))
fn[--len] = 0;
if (! len || (len == strlen ("DEFAULT") && strcasecmp (fn, "DEFAULT") == 0))
wind_current_codepage = wind_default_codepage;
else if (len > 0)
{
rc_uint_type ncp;
if (fn[0] == '0' && (fn[1] == 'x' || fn[1] == 'X'))
ncp = (rc_uint_type) strtol (fn + 2, NULL, 16);
else
ncp = (rc_uint_type) strtol (fn, NULL, 10);
if (ncp == CP_UTF16 || ! unicode_is_valid_codepage (ncp))
fatal (_("invalid value specified for pragma code_page.\n"));
wind_current_codepage = ncp;
}
free (fn);
return;
}
line = strtol (s, &send, 0);
if (*send != '\0' && ! ISSPACE (*send))
return;
/* Subtract 1 because we are about to count the newline. */
rc_lineno = line - 1;
s = send;
while (ISSPACE (*s))
++s;
if (*s != '"')
return;
++s;
send = strchr (s, '"');
if (send == NULL)
return;
fn = xmalloc (send - s + 1);
strncpy (fn, s, send - s);
fn[send - s] = '\0';
free (rc_filename);
rc_filename = fn;
if (! initial_fn)
{
initial_fn = xmalloc (strlen (fn) + 1);
strcpy (initial_fn, fn);
}
/* Allow the initial file, regardless of name. Suppress all other
files if they end in ".h" (this allows included "*.rc"). */
if (strcmp (initial_fn, fn) == 0
|| strcmp (fn + strlen (fn) - 2, ".h") != 0)
suppress_cpp_data = 0;
else
suppress_cpp_data = 1;
}
/* Allocate a string of a given length. */
static char *
get_string (int len)
{
struct alloc_string *as;
as = xmalloc (sizeof *as);
as->s = xmalloc (len);
as->next = strings;
strings = as;
return as->s;
}
/* Handle a quoted string. The quotes are stripped. A pair of quotes
in a string are turned into a single quote. Adjacent strings are
merged separated by whitespace are merged, as in C. */
static char *
handle_quotes (rc_uint_type *len)
{
const char *input = rclex_tok;
char *ret, *s;
const char *t;
int ch;
int num_xdigits;
ret = get_string (strlen (input) + 1);
s = ret;
t = input;
if (*t == '"')
++t;
while (*t != '\0')
{
if (*t == '\\')
{
++t;
switch (*t)
{
case '\0':
rcparse_warning ("backslash at end of string");
break;
case '\"':
rcparse_warning ("use \"\" to put \" in a string");
*s++ = '"';
++t;
break;
case 'a':
*s++ = ESCAPE_B; /* Strange, but true... */
++t;
break;
case 'b':
*s++ = ESCAPE_B;
++t;
break;
case 'f':
*s++ = ESCAPE_F;
++t;
break;
case 'n':
*s++ = ESCAPE_N;
++t;
break;
case 'r':
*s++ = ESCAPE_R;
++t;
break;
case 't':
*s++ = ESCAPE_T;
++t;
break;
case 'v':
*s++ = ESCAPE_V;
++t;
break;
case '\\':
*s++ = *t++;
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
ch = *t - '0';
++t;
if (*t >= '0' && *t <= '7')
{
ch = (ch << 3) | (*t - '0');
++t;
if (*t >= '0' && *t <= '7')
{
ch = (ch << 3) | (*t - '0');
++t;
}
}
*s++ = ch;
break;
case 'x': case 'X':
++t;
ch = 0;
/* We only handle single byte chars here. Make sure
we finish an escape sequence like "/xB0ABC" after
the first two digits. */
num_xdigits = 2;
while (num_xdigits--)
{
if (*t >= '0' && *t <= '9')
ch = (ch << 4) | (*t - '0');
else if (*t >= 'a' && *t <= 'f')
ch = (ch << 4) | (*t - 'a' + 10);
else if (*t >= 'A' && *t <= 'F')
ch = (ch << 4) | (*t - 'A' + 10);
else
break;
++t;
}
*s++ = ch;
break;
default:
rcparse_warning ("unrecognized escape sequence");
*s++ = '\\';
*s++ = *t++;
break;
}
}
else if (*t != '"')
*s++ = *t++;
else if (t[1] == '\0')
break;
else if (t[1] == '"')
{
*s++ = '"';
t += 2;
}
else
{
++t;
if (! ISSPACE (*t))
rcparse_warning ("unexpected character after '\"'");
while (ISSPACE (*t))
{
if ((*t) == '\n')
++rc_lineno;
++t;
}
if (*t == '\0')
break;
assert (*t == '"');
++t;
}
}
*s = '\0';
*len = s - ret;
return ret;
}
/* Allocate a unicode string of a given length. */
static unichar *
get_unistring (int len)
{
return (unichar *) get_string (len * sizeof (unichar));
}
/* Handle a quoted unicode string. The quotes are stripped. A pair of quotes
in a string are turned into a single quote. Adjacent strings are
merged separated by whitespace are merged, as in C. */
static unichar *
handle_uniquotes (rc_uint_type *len)
{
const char *input = rclex_tok;
unichar *ret, *s;
const char *t;
int ch;
int num_xdigits;
ret = get_unistring (strlen (input) + 1);
s = ret;
t = input;
if ((*t == 'L' || *t == 'l') && t[1] == '"')
t += 2;
else if (*t == '"')
++t;
while (*t != '\0')
{
if (*t == '\\')
{
++t;
switch (*t)
{
case '\0':
rcparse_warning ("backslash at end of string");
break;
case '\"':
rcparse_warning ("use \"\" to put \" in a string");
break;
case 'a':
*s++ = ESCAPE_B; /* Strange, but true... */
++t;
break;
case 'b':
*s++ = ESCAPE_B;
++t;
break;
case 'f':
*s++ = ESCAPE_F;
++t;
break;
case 'n':
*s++ = ESCAPE_N;
++t;
break;
case 'r':
*s++ = ESCAPE_R;
++t;
break;
case 't':
*s++ = ESCAPE_T;
++t;
break;
case 'v':
*s++ = ESCAPE_V;
++t;
break;
case '\\':
*s++ = (unichar) *t++;
break;
case '0': case '1': case '2': case '3':
case '4': case '5': case '6': case '7':
ch = *t - '0';
++t;
if (*t >= '0' && *t <= '7')
{
ch = (ch << 3) | (*t - '0');
++t;
if (*t >= '0' && *t <= '7')
{
ch = (ch << 3) | (*t - '0');
++t;
}
}
*s++ = (unichar) ch;
break;
case 'x': case 'X':
++t;
ch = 0;
/* We only handle two byte chars here. Make sure
we finish an escape sequence like "/xB0ABC" after
the first two digits. */
num_xdigits = 4;
while (num_xdigits--)
{
if (*t >= '0' && *t <= '9')
ch = (ch << 4) | (*t - '0');
else if (*t >= 'a' && *t <= 'f')
ch = (ch << 4) | (*t - 'a' + 10);
else if (*t >= 'A' && *t <= 'F')
ch = (ch << 4) | (*t - 'A' + 10);
else
break;
++t;
}
*s++ = (unichar) ch;
break;
default:
rcparse_warning ("unrecognized escape sequence");
*s++ = '\\';
*s++ = (unichar) *t++;
break;
}
}
else if (*t != '"')
*s++ = (unichar) *t++;
else if (t[1] == '\0')
break;
else if (t[1] == '"')
{
*s++ = '"';
t += 2;
}
else
{
++t;
assert (ISSPACE (*t));
while (ISSPACE (*t))
{
if ((*t) == '\n')
++rc_lineno;
++t;
}
if (*t == '\0')
break;
assert (*t == '"');
++t;
}
}
*s = '\0';
*len = s - ret;
return ret;
}
/* Discard all the strings we have allocated. The parser calls this
when it no longer needs them. */
void
rcparse_discard_strings (void)
{
struct alloc_string *as;
as = strings;
while (as != NULL)
{
struct alloc_string *n;
free (as->s);
n = as->next;
free (as);
as = n;
}
strings = NULL;
}
/* Enter rcdata mode. */
void
rcparse_rcdata (void)
{
rcdata_mode = 1;
}
/* Go back to normal mode from rcdata mode. */
void
rcparse_normal (void)
{
rcdata_mode = 0;
}
static void
rclex_tok_add_char (int ch)
{
if (! rclex_tok || rclex_tok_max <= rclex_tok_pos)
{
char *h = xmalloc (rclex_tok_max + 9);
if (! h)
abort ();
if (rclex_tok)
{
memcpy (h, rclex_tok, rclex_tok_pos + 1);
free (rclex_tok);
}
else
rclex_tok_pos = 0;
rclex_tok_max += 8;
rclex_tok = h;
}
if (ch != -1)
rclex_tok[rclex_tok_pos++] = (char) ch;
rclex_tok[rclex_tok_pos] = 0;
}
static int
rclex_readch (void)
{
int r = -1;
if ((r = rclex_lastch) != -1)
rclex_lastch = -1;
else
{
char ch;
do
{
if (! cpp_pipe || feof (cpp_pipe)
|| fread (&ch, 1, 1,cpp_pipe) != 1)
break;
r = ((int) ch) & 0xff;
}
while (r == 0 || r == '\r');
}
rclex_tok_add_char (r);
return r;
}
static int
rclex_peekch (void)
{
int r;
if ((r = rclex_lastch) == -1)
{
if ((r = rclex_readch ()) != -1)
{
rclex_lastch = r;
if (rclex_tok_pos > 0)
rclex_tok[--rclex_tok_pos] = 0;
}
}
return r;
}
static void
rclex_string (void)
{
int c;
while ((c = rclex_peekch ()) != -1)
{
if (c == '\n')
break;
if (c == '\\')
{
rclex_readch ();
if ((c = rclex_peekch ()) == -1 || c == '\n')
break;
rclex_readch ();
}
else if (rclex_readch () == '"')
{
/* PR 6714
Skip any whitespace after the end of the double quotes. */
do
{
c = rclex_peekch ();
if (ISSPACE (c))
rclex_readch ();
else
c = -1;
}
while (c != -1);
if (rclex_peekch () == '"')
rclex_readch ();
else
break;
}
}
}
static rc_uint_type
read_digit (int ch)
{
rc_uint_type base = 10;
rc_uint_type ret, val;
int warned = 0;
ret = 0;
if (ch == '0')
{
base = 8;
switch (rclex_peekch ())
{
case 'o': case 'O':
rclex_readch ();
base = 8;
break;
case 'x': case 'X':
rclex_readch ();
base = 16;
break;
}
}
else
ret = (rc_uint_type) (ch - '0');
while ((ch = rclex_peekch ()) != -1)
{
if (ISDIGIT (ch))
val = (rc_uint_type) (ch - '0');
else if (ch >= 'a' && ch <= 'f')
val = (rc_uint_type) ((ch - 'a') + 10);
else if (ch >= 'A' && ch <= 'F')
val = (rc_uint_type) ((ch - 'A') + 10);
else
break;
rclex_readch ();
if (! warned && val >= base)
{
warned = 1;
rcparse_warning ("digit exceeds base");
}
ret *= base;
ret += val;
}
return ret;
}
/* yyparser entry method. */
int
yylex (void)
{
char *s;
unichar *us;
rc_uint_type length;
int ch;
/* Make sure that rclex_tok is initialized. */
if (! rclex_tok)
rclex_tok_add_char (-1);
do
{
do
{
/* Clear token. */
rclex_tok_pos = 0;
rclex_tok[0] = 0;
if ((ch = rclex_readch ()) == -1)
return -1;
if (ch == '\n')
++rc_lineno;
}
while (ch <= 0x20);
switch (ch)
{
case '#':
while ((ch = rclex_peekch ()) != -1 && ch != '\n')
rclex_readch ();
cpp_line ();
ch = IGNORED_TOKEN;
break;
case '{':
ch = IGNORE_CPP (BEG);
break;
case '}':
ch = IGNORE_CPP (END);
break;
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
yylval.i.val = read_digit (ch);
yylval.i.dword = 0;
switch (rclex_peekch ())
{
case 'l': case 'L':
rclex_readch ();
yylval.i.dword = 1;
break;
}
ch = IGNORE_CPP (NUMBER);
break;
case '"':
rclex_string ();
ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDSTRING : SIZEDSTRING));
if (ch == IGNORED_TOKEN)
break;
s = handle_quotes (&length);
if (! rcdata_mode)
yylval.s = s;
else
{
yylval.ss.length = length;
yylval.ss.s = s;
}
break;
case 'L': case 'l':
if (rclex_peekch () == '"')
{
rclex_readch ();
rclex_string ();
ch = IGNORE_CPP ((! rcdata_mode ? QUOTEDUNISTRING : SIZEDUNISTRING));
if (ch == IGNORED_TOKEN)
break;
us = handle_uniquotes (&length);
if (! rcdata_mode)
yylval.uni = us;
else
{
yylval.suni.length = length;
yylval.suni.s = us;
}
break;
}
/* Fall through. */
default:
if (ISIDST (ch) || ch=='$')
{
while ((ch = rclex_peekch ()) != -1
&& (ISIDNUM (ch) || ch == '$' || ch == '.'
|| ch == ':' || ch == '\\' || ch == '/'
|| ch == '_' || ch == '-')
)
rclex_readch ();
ch = IGNORE_CPP (rclex_translatekeyword (rclex_tok));
if (ch == STRING)
{
s = get_string (strlen (rclex_tok) + 1);
strcpy (s, rclex_tok);
yylval.s = s;
}
else if (ch == BLOCK)
{
const char *hs = NULL;
switch (yylex ())
{
case STRING:
case QUOTEDSTRING:
hs = yylval.s;
break;
case SIZEDSTRING:
hs = yylval.s = yylval.ss.s;
break;
}
if (! hs)
{
rcparse_warning ("BLOCK expects a string as argument.");
ch = IGNORED_TOKEN;
}
else if (! strcmp (hs, "StringFileInfo"))
ch = BLOCKSTRINGFILEINFO;
else if (! strcmp (hs, "VarFileInfo"))
ch = BLOCKVARFILEINFO;
}
break;
}
ch = IGNORE_CPP (ch);
break;
}
}
while (ch == IGNORED_TOKEN);
return ch;
}