mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-03-10 02:36:56 +08:00
c-lex.c (lex_charconst): Convert into a simple wrapper around cpp_interpret_charconst...
* c-lex.c (lex_charconst): Convert into a simple wrapper around cpp_interpret_charconst, to which most of the code body is moved. * cppexp.c (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE, MAX_LONG_TYPE_SIZE, MAX_INT_TYPE_SIZE, MAX_CHAR_TYPE_MASK, MAX_WCHAR_TYPE_MASK, parse_escape, parse_charconst): Remove. (lex): Use cpp_interpret_charconst. * cpplex.c (parse_escape, read_ucs, cpp_interpret_charconst, hex_digit_value): New functions. (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): New macros. * cpplib.h (cpp_interpret_charconst): New prototype. * Makefile.in: Update. From-SVN: r41978
This commit is contained in:
parent
f42974dc81
commit
c8a9607017
@ -1,3 +1,18 @@
|
||||
2001-05-12 Neil Booth <neil@daikokuya.demon.co.uk>
|
||||
|
||||
* c-lex.c (lex_charconst): Convert into a simple wrapper
|
||||
around cpp_interpret_charconst, to which most of the code
|
||||
body is moved.
|
||||
* cppexp.c (MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE,
|
||||
MAX_LONG_TYPE_SIZE, MAX_INT_TYPE_SIZE, MAX_CHAR_TYPE_MASK,
|
||||
MAX_WCHAR_TYPE_MASK, parse_escape, parse_charconst): Remove.
|
||||
(lex): Use cpp_interpret_charconst.
|
||||
* cpplex.c (parse_escape, read_ucs, cpp_interpret_charconst,
|
||||
hex_digit_value): New functions.
|
||||
(MAX_CHAR_TYPE_SIZE, MAX_WCHAR_TYPE_SIZE): New macros.
|
||||
* cpplib.h (cpp_interpret_charconst): New prototype.
|
||||
* Makefile.in: Update.
|
||||
|
||||
2001-05-12 Dean Wakerley <dean@wakerley.com>
|
||||
|
||||
* doc/install.texi: New file. Converted to texinfo from the HTML
|
||||
|
@ -1955,7 +1955,7 @@ cppmain.o: cppmain.c $(CONFIG_H) cpplib.h intl.h $(SYSTEM_H)
|
||||
|
||||
cpperror.o: cpperror.c $(CONFIG_H) $(LIBCPP_DEPS)
|
||||
cppexp.o: cppexp.c $(CONFIG_H) $(LIBCPP_DEPS)
|
||||
cpplex.o: cpplex.c $(CONFIG_H) $(LIBCPP_DEPS)
|
||||
cpplex.o: cpplex.c $(CONFIG_H) $(LIBCPP_DEPS) mbchar.h
|
||||
cppmacro.o: cppmacro.c $(CONFIG_H) $(LIBCPP_DEPS)
|
||||
cpplib.o: cpplib.c $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
|
||||
cpphash.o: cpphash.c $(CONFIG_H) $(LIBCPP_DEPS) $(OBSTACK_H)
|
||||
|
130
gcc/c-lex.c
130
gcc/c-lex.c
@ -86,7 +86,7 @@ static const char *read_ucs PARAMS ((const char *, const char *,
|
||||
static void parse_float PARAMS ((PTR));
|
||||
static tree lex_number PARAMS ((const char *, unsigned int));
|
||||
static tree lex_string PARAMS ((const char *, unsigned int, int));
|
||||
static tree lex_charconst PARAMS ((const char *, unsigned int, int));
|
||||
static tree lex_charconst PARAMS ((const cpp_token *));
|
||||
static void update_header_times PARAMS ((const char *));
|
||||
static int dump_one_header PARAMS ((splay_tree_node, void *));
|
||||
static void cb_ident PARAMS ((cpp_reader *, const cpp_string *));
|
||||
@ -1008,8 +1008,7 @@ c_lex (value)
|
||||
|
||||
case CPP_CHAR:
|
||||
case CPP_WCHAR:
|
||||
*value = lex_charconst ((const char *)tok.val.str.text,
|
||||
tok.val.str.len, tok.type == CPP_WCHAR);
|
||||
*value = lex_charconst (&tok);
|
||||
break;
|
||||
|
||||
case CPP_STRING:
|
||||
@ -1607,113 +1606,36 @@ lex_string (str, len, wide)
|
||||
return value;
|
||||
}
|
||||
|
||||
/* Converts a (possibly wide) character constant token into a tree. */
|
||||
static tree
|
||||
lex_charconst (str, len, wide)
|
||||
const char *str;
|
||||
unsigned int len;
|
||||
int wide;
|
||||
lex_charconst (token)
|
||||
const cpp_token *token;
|
||||
{
|
||||
const char *limit = str + len;
|
||||
int result = 0;
|
||||
int num_chars = 0;
|
||||
int chars_seen = 0;
|
||||
unsigned width = TYPE_PRECISION (char_type_node);
|
||||
int max_chars;
|
||||
unsigned int c;
|
||||
HOST_WIDE_INT result;
|
||||
tree value;
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
int longest_char = local_mb_cur_max ();
|
||||
(void) local_mbtowc (NULL, NULL, 0);
|
||||
#endif
|
||||
|
||||
max_chars = TYPE_PRECISION (integer_type_node) / width;
|
||||
if (wide)
|
||||
width = WCHAR_TYPE_SIZE;
|
||||
|
||||
while (str < limit)
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
wchar_t wc;
|
||||
int char_len;
|
||||
|
||||
char_len = local_mbtowc (&wc, str, limit - str);
|
||||
if (char_len == -1)
|
||||
{
|
||||
warning ("Ignoring invalid multibyte character");
|
||||
char_len = 1;
|
||||
c = *str++;
|
||||
}
|
||||
else
|
||||
{
|
||||
str += char_len;
|
||||
c = wc;
|
||||
}
|
||||
#else
|
||||
c = *str++;
|
||||
#endif
|
||||
|
||||
++chars_seen;
|
||||
if (c == '\\')
|
||||
{
|
||||
str = readescape (str, limit, &c);
|
||||
if (width < HOST_BITS_PER_INT
|
||||
&& (unsigned) c >= ((unsigned)1 << width))
|
||||
pedwarn ("escape sequence out of range for character");
|
||||
}
|
||||
#ifdef MAP_CHARACTER
|
||||
if (ISPRINT (c))
|
||||
c = MAP_CHARACTER (c);
|
||||
#endif
|
||||
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
num_chars += (width / TYPE_PRECISION (char_type_node));
|
||||
if (num_chars < max_chars + 1)
|
||||
{
|
||||
if (width < HOST_BITS_PER_INT)
|
||||
result = (result << width) | (c & ((1 << width) - 1));
|
||||
else
|
||||
result = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (chars_seen == 0)
|
||||
error ("empty character constant");
|
||||
else if (num_chars > max_chars)
|
||||
{
|
||||
num_chars = max_chars;
|
||||
error ("character constant too long");
|
||||
}
|
||||
else if (chars_seen != 1 && ! flag_traditional && warn_multichar)
|
||||
warning ("multi-character character constant");
|
||||
|
||||
/* If char type is signed, sign-extend the constant. */
|
||||
if (! wide)
|
||||
{
|
||||
int num_bits = num_chars * width;
|
||||
if (num_bits == 0)
|
||||
/* We already got an error; avoid invalid shift. */
|
||||
value = build_int_2 (0, 0);
|
||||
else if (TREE_UNSIGNED (char_type_node)
|
||||
|| ((result >> (num_bits - 1)) & 1) == 0)
|
||||
value = build_int_2 (result & (~(unsigned HOST_WIDE_INT) 0
|
||||
>> (HOST_BITS_PER_WIDE_INT - num_bits)),
|
||||
0);
|
||||
else
|
||||
value = build_int_2 (result | ~(~(unsigned HOST_WIDE_INT) 0
|
||||
>> (HOST_BITS_PER_WIDE_INT - num_bits)),
|
||||
-1);
|
||||
/* In C, a character constant has type 'int'; in C++, 'char'. */
|
||||
if (chars_seen <= 1 && c_language == clk_cplusplus)
|
||||
TREE_TYPE (value) = char_type_node;
|
||||
else
|
||||
TREE_TYPE (value) = integer_type_node;
|
||||
}
|
||||
else
|
||||
unsigned int chars_seen;
|
||||
|
||||
result = cpp_interpret_charconst (parse_in, token, warn_multichar,
|
||||
flag_traditional, &chars_seen);
|
||||
if (token->type == CPP_WCHAR)
|
||||
{
|
||||
value = build_int_2 (result, 0);
|
||||
TREE_TYPE (value) = wchar_type_node;
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
if (result < 0)
|
||||
value = build_int_2 (result, -1);
|
||||
else
|
||||
value = build_int_2 (result, 0);
|
||||
|
||||
/* In C, a character constant has type 'int'.
|
||||
In C++ 'char', but multi-char charconsts have type 'int'. */
|
||||
if (c_language == clk_cplusplus && chars_seen <= 1)
|
||||
TREE_TYPE (value) = char_type_node;
|
||||
else
|
||||
TREE_TYPE (value) = integer_type_node;
|
||||
}
|
||||
|
||||
return value;
|
||||
}
|
||||
|
210
gcc/cppexp.c
210
gcc/cppexp.c
@ -18,37 +18,11 @@ along with this program; if not, write to the Free Software
|
||||
Foundation, 59 Temple Place - Suite 330,
|
||||
Boston, MA 02111-1307, USA. */
|
||||
|
||||
/* Parse a C expression from text in a string */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "cpplib.h"
|
||||
#include "cpphash.h"
|
||||
|
||||
#ifndef MAX_CHAR_TYPE_SIZE
|
||||
#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#ifndef MAX_INT_TYPE_SIZE
|
||||
#define MAX_INT_TYPE_SIZE INT_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#ifndef MAX_LONG_TYPE_SIZE
|
||||
#define MAX_LONG_TYPE_SIZE LONG_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#ifndef MAX_WCHAR_TYPE_SIZE
|
||||
#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#define MAX_CHAR_TYPE_MASK (MAX_CHAR_TYPE_SIZE < HOST_BITS_PER_WIDEST_INT \
|
||||
? (~(~(HOST_WIDEST_INT) 0 << MAX_CHAR_TYPE_SIZE)) \
|
||||
: ~ (HOST_WIDEST_INT) 0)
|
||||
|
||||
#define MAX_WCHAR_TYPE_MASK (MAX_WCHAR_TYPE_SIZE < HOST_BITS_PER_WIDEST_INT \
|
||||
? ~(~(HOST_WIDEST_INT) 0 << MAX_WCHAR_TYPE_SIZE) \
|
||||
: ~ (HOST_WIDEST_INT) 0)
|
||||
|
||||
/* Yield nonzero if adding two numbers with A's and B's signs can yield a
|
||||
number with SUM's sign, where A, B, and SUM are all C integers. */
|
||||
#define possible_sum_sign(a, b, sum) ((((a) ^ (b)) | ~ ((a) ^ (sum))) < 0)
|
||||
@ -61,10 +35,7 @@ static HOST_WIDEST_INT right_shift PARAMS ((cpp_reader *, HOST_WIDEST_INT,
|
||||
unsigned int,
|
||||
unsigned HOST_WIDEST_INT));
|
||||
static struct op parse_number PARAMS ((cpp_reader *, const cpp_token *));
|
||||
static struct op parse_charconst PARAMS ((cpp_reader *, const cpp_token *));
|
||||
static struct op parse_defined PARAMS ((cpp_reader *));
|
||||
static HOST_WIDEST_INT parse_escape PARAMS ((cpp_reader *, const U_CHAR **,
|
||||
const U_CHAR *, HOST_WIDEST_INT));
|
||||
static struct op lex PARAMS ((cpp_reader *, int, cpp_token *));
|
||||
static const unsigned char *op_as_text PARAMS ((cpp_reader *, enum cpp_ttype));
|
||||
|
||||
@ -238,81 +209,6 @@ parse_number (pfile, tok)
|
||||
return op;
|
||||
}
|
||||
|
||||
/* Parse and convert a character constant for #if. Understands backslash
|
||||
escapes (\n, \031) and multibyte characters (if so configured). */
|
||||
static struct op
|
||||
parse_charconst (pfile, tok)
|
||||
cpp_reader *pfile;
|
||||
const cpp_token *tok;
|
||||
{
|
||||
struct op op;
|
||||
HOST_WIDEST_INT result = 0;
|
||||
int num_chars = 0;
|
||||
int num_bits;
|
||||
unsigned int width = MAX_CHAR_TYPE_SIZE;
|
||||
HOST_WIDEST_INT mask = MAX_CHAR_TYPE_MASK;
|
||||
int max_chars;
|
||||
const U_CHAR *ptr = tok->val.str.text;
|
||||
const U_CHAR *end = ptr + tok->val.str.len;
|
||||
|
||||
int c = -1;
|
||||
|
||||
if (tok->type == CPP_WCHAR)
|
||||
width = MAX_WCHAR_TYPE_SIZE, mask = MAX_WCHAR_TYPE_MASK;
|
||||
max_chars = MAX_LONG_TYPE_SIZE / width;
|
||||
|
||||
while (ptr < end)
|
||||
{
|
||||
c = *ptr++;
|
||||
if (c == '\'')
|
||||
CPP_ICE ("unescaped ' in character constant");
|
||||
else if (c == '\\')
|
||||
{
|
||||
c = parse_escape (pfile, &ptr, end, mask);
|
||||
if (width < HOST_BITS_PER_INT
|
||||
&& (unsigned int) c >= (unsigned int)(1 << width))
|
||||
cpp_pedwarn (pfile,
|
||||
"escape sequence out of range for character");
|
||||
}
|
||||
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
if (++num_chars <= max_chars)
|
||||
{
|
||||
if (width < HOST_BITS_PER_INT)
|
||||
result = (result << width) | (c & ((1 << width) - 1));
|
||||
else
|
||||
result = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (num_chars == 0)
|
||||
SYNTAX_ERROR ("empty character constant");
|
||||
else if (num_chars > max_chars)
|
||||
SYNTAX_ERROR ("character constant too long");
|
||||
else if (num_chars != 1)
|
||||
cpp_warning (pfile, "multi-character character constant");
|
||||
|
||||
/* If char type is signed, sign-extend the constant. */
|
||||
num_bits = num_chars * width;
|
||||
|
||||
if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
|
||||
|| ((result >> (num_bits - 1)) & 1) == 0)
|
||||
op.value = result & ((unsigned HOST_WIDEST_INT) ~0
|
||||
>> (HOST_BITS_PER_WIDEST_INT - num_bits));
|
||||
else
|
||||
op.value = result | ~((unsigned HOST_WIDEST_INT) ~0
|
||||
>> (HOST_BITS_PER_WIDEST_INT - num_bits));
|
||||
|
||||
/* This is always a signed type. */
|
||||
op.unsignedp = 0;
|
||||
op.op = CPP_INT;
|
||||
return op;
|
||||
|
||||
syntax_error:
|
||||
op.op = CPP_ERROR;
|
||||
return op;
|
||||
}
|
||||
|
||||
static struct op
|
||||
parse_defined (pfile)
|
||||
cpp_reader *pfile;
|
||||
@ -405,7 +301,15 @@ lex (pfile, skip_evaluation, token)
|
||||
|
||||
case CPP_CHAR:
|
||||
case CPP_WCHAR:
|
||||
return parse_charconst (pfile, token);
|
||||
{
|
||||
unsigned int chars_seen;
|
||||
|
||||
/* This is always a signed type. */
|
||||
op.unsignedp = 0;
|
||||
op.op = CPP_INT;
|
||||
op.value = cpp_interpret_charconst (pfile, token, 1, 0, &chars_seen);
|
||||
return op;
|
||||
}
|
||||
|
||||
case CPP_STRING:
|
||||
case CPP_WSTRING:
|
||||
@ -494,102 +398,6 @@ lex (pfile, skip_evaluation, token)
|
||||
return op;
|
||||
}
|
||||
|
||||
/* Parse a C escape sequence. STRING_PTR points to a variable
|
||||
containing a pointer to the string to parse. That pointer
|
||||
is updated past the characters we use. The value of the
|
||||
escape sequence is returned.
|
||||
|
||||
If \ is followed by 000, we return 0 and leave the string pointer
|
||||
after the zeros. A value of 0 does not mean end of string. */
|
||||
|
||||
static HOST_WIDEST_INT
|
||||
parse_escape (pfile, string_ptr, limit, result_mask)
|
||||
cpp_reader *pfile;
|
||||
const U_CHAR **string_ptr;
|
||||
const U_CHAR *limit;
|
||||
HOST_WIDEST_INT result_mask;
|
||||
{
|
||||
const U_CHAR *ptr = *string_ptr;
|
||||
/* We know we have at least one following character. */
|
||||
int c = *ptr++;
|
||||
switch (c)
|
||||
{
|
||||
case 'a': c = TARGET_BELL; break;
|
||||
case 'b': c = TARGET_BS; break;
|
||||
case 'f': c = TARGET_FF; break;
|
||||
case 'n': c = TARGET_NEWLINE; break;
|
||||
case 'r': c = TARGET_CR; break;
|
||||
case 't': c = TARGET_TAB; break;
|
||||
case 'v': c = TARGET_VT; break;
|
||||
|
||||
case 'e': case 'E':
|
||||
if (CPP_PEDANTIC (pfile))
|
||||
cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
|
||||
c = TARGET_ESC;
|
||||
break;
|
||||
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
{
|
||||
unsigned int i = c - '0';
|
||||
int count = 0;
|
||||
while (++count < 3)
|
||||
{
|
||||
if (ptr >= limit)
|
||||
break;
|
||||
|
||||
c = *ptr;
|
||||
if (c < '0' || c > '7')
|
||||
break;
|
||||
ptr++;
|
||||
i = (i << 3) + c - '0';
|
||||
}
|
||||
if (i != (i & result_mask))
|
||||
{
|
||||
i &= result_mask;
|
||||
cpp_pedwarn (pfile, "octal escape sequence out of range");
|
||||
}
|
||||
c = i;
|
||||
break;
|
||||
}
|
||||
|
||||
case 'x':
|
||||
{
|
||||
unsigned int i = 0, overflow = 0;
|
||||
int digits_found = 0, digit;
|
||||
for (;;)
|
||||
{
|
||||
if (ptr >= limit)
|
||||
break;
|
||||
c = *ptr;
|
||||
if (c >= '0' && c <= '9')
|
||||
digit = c - '0';
|
||||
else if (c >= 'a' && c <= 'f')
|
||||
digit = c - 'a' + 10;
|
||||
else if (c >= 'A' && c <= 'F')
|
||||
digit = c - 'A' + 10;
|
||||
else
|
||||
break;
|
||||
ptr++;
|
||||
overflow |= i ^ (i << 4 >> 4);
|
||||
i = (i << 4) + digit;
|
||||
digits_found = 1;
|
||||
}
|
||||
if (!digits_found)
|
||||
cpp_error (pfile, "\\x used with no following hex digits");
|
||||
if (overflow | (i != (i & result_mask)))
|
||||
{
|
||||
i &= result_mask;
|
||||
cpp_pedwarn (pfile, "hex escape sequence out of range");
|
||||
}
|
||||
c = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
*string_ptr = ptr;
|
||||
return c;
|
||||
}
|
||||
|
||||
static void
|
||||
integer_overflow (pfile)
|
||||
cpp_reader *pfile;
|
||||
|
@ -391,7 +391,7 @@ macro is defined when we leave the header file for the first time. If
|
||||
the host supports it, we try to map suitably large files into memory,
|
||||
rather than reading them in directly.
|
||||
|
||||
The include paths are intenally stored on a null-terminated
|
||||
The include paths are internally stored on a null-terminated
|
||||
singly-linked list, starting with the @code{"header.h"} directory search
|
||||
chain, which then links into the @code{<header.h>} directory chain.
|
||||
|
||||
|
349
gcc/cpplex.c
349
gcc/cpplex.c
@ -39,6 +39,18 @@ Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
|
||||
#include "cpplib.h"
|
||||
#include "cpphash.h"
|
||||
|
||||
/* MULTIBYTE_CHARS support only works for native compilers.
|
||||
??? Ideally what we want is to model widechar support after
|
||||
the current floating point support. */
|
||||
#ifdef CROSS_COMPILE
|
||||
#undef MULTIBYTE_CHARS
|
||||
#endif
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
#include "mbchar.h"
|
||||
#include <locale.h>
|
||||
#endif
|
||||
|
||||
/* Tokens with SPELL_STRING store their spelling in the token list,
|
||||
and it's length in the token->val.name.len. */
|
||||
enum spell_type
|
||||
@ -86,9 +98,15 @@ static void save_comment PARAMS ((cpp_reader *, cpp_token *, const U_CHAR *));
|
||||
static void lex_percent PARAMS ((cpp_buffer *, cpp_token *));
|
||||
static void lex_dot PARAMS ((cpp_reader *, cpp_token *));
|
||||
static int name_p PARAMS ((cpp_reader *, const cpp_string *));
|
||||
static unsigned int parse_escape PARAMS ((cpp_reader *, const unsigned char **,
|
||||
const unsigned char *, HOST_WIDE_INT,
|
||||
int));
|
||||
static unsigned int read_ucs PARAMS ((cpp_reader *, const unsigned char **,
|
||||
const unsigned char *, unsigned int));
|
||||
|
||||
static cpp_chunk *new_chunk PARAMS ((unsigned int));
|
||||
static int chunk_suitable PARAMS ((cpp_pool *, cpp_chunk *, unsigned int));
|
||||
static unsigned int hex_digit_value PARAMS ((unsigned int));
|
||||
|
||||
/* Utility routine:
|
||||
|
||||
@ -1640,6 +1658,337 @@ cpp_output_line (pfile, fp)
|
||||
putc ('\n', fp);
|
||||
}
|
||||
|
||||
/* Returns the value of a hexadecimal digit. */
|
||||
static unsigned int
|
||||
hex_digit_value (c)
|
||||
unsigned int c;
|
||||
{
|
||||
if (c >= 'a' && c <= 'f')
|
||||
return c - 'a' + 10;
|
||||
if (c >= 'A' && c <= 'F')
|
||||
return c - 'A' + 10;
|
||||
if (c >= '0' && c <= '9')
|
||||
return c - '0';
|
||||
abort ();
|
||||
}
|
||||
|
||||
/* Parse a '\uNNNN' or '\UNNNNNNNN' sequence (C++ and C99).
|
||||
|
||||
[lex.charset]: The character designated by the universal character
|
||||
name \UNNNNNNNN is that character whose character short name in
|
||||
ISO/IEC 10646 is NNNNNNNN; the character designated by the
|
||||
universal character name \uNNNN is that character whose character
|
||||
short name in ISO/IEC 10646 is 0000NNNN. If the hexadecimal value
|
||||
for a universal character name is less than 0x20 or in the range
|
||||
0x7F-0x9F (inclusive), or if the universal character name
|
||||
designates a character in the basic source character set, then the
|
||||
program is ill-formed.
|
||||
|
||||
We assume that wchar_t is Unicode, so we don't need to do any
|
||||
mapping. Is this ever wrong? */
|
||||
|
||||
static unsigned int
|
||||
read_ucs (pfile, pstr, limit, length)
|
||||
cpp_reader *pfile;
|
||||
const unsigned char **pstr;
|
||||
const unsigned char *limit;
|
||||
unsigned int length;
|
||||
{
|
||||
const unsigned char *p = *pstr;
|
||||
unsigned int c, code = 0;
|
||||
|
||||
for (; length; --length)
|
||||
{
|
||||
if (p >= limit)
|
||||
{
|
||||
cpp_error (pfile, "incomplete universal-character-name");
|
||||
break;
|
||||
}
|
||||
|
||||
c = *p;
|
||||
if (ISXDIGIT (c))
|
||||
{
|
||||
code = (code << 4) + hex_digit_value (c);
|
||||
p++;
|
||||
}
|
||||
else
|
||||
{
|
||||
cpp_error (pfile,
|
||||
"non-hex digit '%c' in universal-character-name", c);
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#ifdef TARGET_EBCDIC
|
||||
cpp_error (pfile, "universal-character-name on EBCDIC target");
|
||||
code = 0x3f; /* EBCDIC invalid character */
|
||||
#else
|
||||
if (code > 0x9f && !(code & 0x80000000))
|
||||
; /* True extended character, OK. */
|
||||
else if (code >= 0x20 && code < 0x7f)
|
||||
{
|
||||
/* ASCII printable character. The C character set consists of all of
|
||||
these except $, @ and `. We use hex escapes so that this also
|
||||
works with EBCDIC hosts. */
|
||||
if (code != 0x24 && code != 0x40 && code != 0x60)
|
||||
cpp_error (pfile, "universal-character-name used for '%c'", code);
|
||||
}
|
||||
else
|
||||
cpp_error (pfile, "invalid universal-character-name");
|
||||
#endif
|
||||
|
||||
*pstr = p;
|
||||
return code;
|
||||
}
|
||||
|
||||
/* Interpret an escape sequence, and return its value. PSTR points to
|
||||
the input pointer, which is just after the backslash. LIMIT is how
|
||||
much text we have. MASK is the precision for the target type (char
|
||||
or wchar_t). TRADITIONAL, if true, does not interpret escapes that
|
||||
did not exist in traditional C. */
|
||||
|
||||
static unsigned int
|
||||
parse_escape (pfile, pstr, limit, mask, traditional)
|
||||
cpp_reader *pfile;
|
||||
const unsigned char **pstr;
|
||||
const unsigned char *limit;
|
||||
HOST_WIDE_INT mask;
|
||||
int traditional;
|
||||
{
|
||||
int unknown = 0;
|
||||
const unsigned char *str = *pstr;
|
||||
unsigned int c = *str++;
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\\': case '\'': case '"': case '?': break;
|
||||
case 'b': c = TARGET_BS; break;
|
||||
case 'f': c = TARGET_FF; break;
|
||||
case 'n': c = TARGET_NEWLINE; break;
|
||||
case 'r': c = TARGET_CR; break;
|
||||
case 't': c = TARGET_TAB; break;
|
||||
case 'v': c = TARGET_VT; break;
|
||||
|
||||
case '(': case '{': case '[': case '%':
|
||||
/* '\(', etc, are used at beginning of line to avoid confusing Emacs.
|
||||
'\%' is used to prevent SCCS from getting confused. */
|
||||
unknown = CPP_PEDANTIC (pfile);
|
||||
break;
|
||||
|
||||
case 'a':
|
||||
if (CPP_WTRADITIONAL (pfile))
|
||||
cpp_warning (pfile, "the meaning of '\\a' varies with -traditional");
|
||||
if (!traditional)
|
||||
c = TARGET_BELL;
|
||||
break;
|
||||
|
||||
case 'e': case 'E':
|
||||
if (CPP_PEDANTIC (pfile))
|
||||
cpp_pedwarn (pfile, "non-ISO-standard escape sequence, '\\%c'", c);
|
||||
c = TARGET_ESC;
|
||||
break;
|
||||
|
||||
/* Warnings and support checks handled by read_ucs(). */
|
||||
case 'u': case 'U':
|
||||
if (CPP_OPTION (pfile, cplusplus) || CPP_OPTION (pfile, c99))
|
||||
{
|
||||
if (CPP_WTRADITIONAL (pfile))
|
||||
cpp_warning (pfile,
|
||||
"the meaning of '\\%c' varies with -traditional", c);
|
||||
c = read_ucs (pfile, &str, limit, c == 'u' ? 4 : 8);
|
||||
}
|
||||
else
|
||||
unknown = 1;
|
||||
break;
|
||||
|
||||
case 'x':
|
||||
if (CPP_WTRADITIONAL (pfile))
|
||||
cpp_warning (pfile, "the meaning of '\\x' varies with -traditional");
|
||||
|
||||
if (!traditional)
|
||||
{
|
||||
unsigned int i = 0, overflow = 0;
|
||||
int digits_found = 0;
|
||||
|
||||
while (str < limit)
|
||||
{
|
||||
c = *str;
|
||||
if (! ISXDIGIT (c))
|
||||
break;
|
||||
str++;
|
||||
overflow |= i ^ (i << 4 >> 4);
|
||||
i = (i << 4) + hex_digit_value (c);
|
||||
digits_found = 1;
|
||||
}
|
||||
|
||||
if (!digits_found)
|
||||
cpp_error (pfile, "\\x used with no following hex digits");
|
||||
|
||||
if (overflow | (i != (i & mask)))
|
||||
{
|
||||
cpp_pedwarn (pfile, "hex escape sequence out of range");
|
||||
i &= mask;
|
||||
}
|
||||
c = i;
|
||||
}
|
||||
break;
|
||||
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
{
|
||||
unsigned int i = c - '0';
|
||||
int count = 0;
|
||||
|
||||
while (str < limit && ++count < 3)
|
||||
{
|
||||
c = *str;
|
||||
if (c < '0' || c > '7')
|
||||
break;
|
||||
str++;
|
||||
i = (i << 3) + c - '0';
|
||||
}
|
||||
|
||||
if (i != (i & mask))
|
||||
{
|
||||
cpp_pedwarn (pfile, "octal escape sequence out of range");
|
||||
i &= mask;
|
||||
}
|
||||
c = i;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
unknown = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (unknown)
|
||||
{
|
||||
if (ISGRAPH (c))
|
||||
cpp_pedwarn (pfile, "unknown escape sequence '\\%c'", c);
|
||||
else
|
||||
cpp_pedwarn (pfile, "unknown escape sequence: '\\%03o'", c);
|
||||
}
|
||||
|
||||
*pstr = str;
|
||||
return c;
|
||||
}
|
||||
|
||||
#ifndef MAX_CHAR_TYPE_SIZE
|
||||
#define MAX_CHAR_TYPE_SIZE CHAR_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
#ifndef MAX_WCHAR_TYPE_SIZE
|
||||
#define MAX_WCHAR_TYPE_SIZE WCHAR_TYPE_SIZE
|
||||
#endif
|
||||
|
||||
/* Interpret a (possibly wide) character constant in TOKEN.
|
||||
WARN_MULTI warns about multi-character charconsts, if not
|
||||
TRADITIONAL. TRADITIONAL also indicates not to interpret escapes
|
||||
that did not exist in traditional C. PCHARS_SEEN points to a
|
||||
variable that is filled in with the number of characters seen. */
|
||||
HOST_WIDE_INT
|
||||
cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
|
||||
cpp_reader *pfile;
|
||||
const cpp_token *token;
|
||||
int warn_multi;
|
||||
int traditional;
|
||||
unsigned int *pchars_seen;
|
||||
{
|
||||
const unsigned char *str = token->val.str.text;
|
||||
const unsigned char *limit = str + token->val.str.len;
|
||||
unsigned int chars_seen = 0;
|
||||
unsigned int width, max_chars, c;
|
||||
HOST_WIDE_INT result = 0, mask;
|
||||
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
(void) local_mbtowc (NULL, NULL, 0);
|
||||
#endif
|
||||
|
||||
/* Width in bits. */
|
||||
if (token->type == CPP_CHAR)
|
||||
width = MAX_CHAR_TYPE_SIZE;
|
||||
else
|
||||
width = MAX_WCHAR_TYPE_SIZE;
|
||||
|
||||
if (width < HOST_BITS_PER_WIDE_INT)
|
||||
mask = ((unsigned HOST_WIDE_INT) 1 << width) - 1;
|
||||
else
|
||||
mask = ~0;
|
||||
max_chars = HOST_BITS_PER_WIDE_INT / width;
|
||||
|
||||
while (str < limit)
|
||||
{
|
||||
#ifdef MULTIBYTE_CHARS
|
||||
wchar_t wc;
|
||||
int char_len;
|
||||
|
||||
char_len = local_mbtowc (&wc, str, limit - str);
|
||||
if (char_len == -1)
|
||||
{
|
||||
cpp_warning (pfile, "ignoring invalid multibyte character");
|
||||
c = *str++;
|
||||
}
|
||||
else
|
||||
{
|
||||
str += char_len;
|
||||
c = wc;
|
||||
}
|
||||
#else
|
||||
c = *str++;
|
||||
#endif
|
||||
|
||||
if (c == '\\')
|
||||
{
|
||||
c = parse_escape (pfile, &str, limit, mask, traditional);
|
||||
if (width < HOST_BITS_PER_WIDE_INT && c > mask)
|
||||
cpp_pedwarn (pfile, "escape sequence out of range for character");
|
||||
}
|
||||
|
||||
#ifdef MAP_CHARACTER
|
||||
if (ISPRINT (c))
|
||||
c = MAP_CHARACTER (c);
|
||||
#endif
|
||||
|
||||
/* Merge character into result; ignore excess chars. */
|
||||
if (++chars_seen <= max_chars)
|
||||
{
|
||||
if (width < HOST_BITS_PER_WIDE_INT)
|
||||
result = (result << width) | (c & mask);
|
||||
else
|
||||
result = c;
|
||||
}
|
||||
}
|
||||
|
||||
if (chars_seen == 0)
|
||||
cpp_error (pfile, "empty character constant");
|
||||
else if (chars_seen > max_chars)
|
||||
{
|
||||
chars_seen = max_chars;
|
||||
cpp_error (pfile, "character constant too long");
|
||||
}
|
||||
else if (chars_seen > 1 && !traditional && warn_multi)
|
||||
cpp_warning (pfile, "multi-character character constant");
|
||||
|
||||
/* If char type is signed, sign-extend the constant. The
|
||||
__CHAR_UNSIGNED__ macro is set by the driver if appropriate. */
|
||||
if (token->type == CPP_CHAR && chars_seen)
|
||||
{
|
||||
unsigned int nbits = chars_seen * width;
|
||||
unsigned int mask = (unsigned int) ~0 >> (HOST_BITS_PER_INT - nbits);
|
||||
|
||||
if (pfile->spec_nodes.n__CHAR_UNSIGNED__->type == NT_MACRO
|
||||
|| ((result >> (nbits - 1)) & 1) == 0)
|
||||
result &= mask;
|
||||
else
|
||||
result |= ~mask;
|
||||
}
|
||||
|
||||
*pchars_seen = chars_seen;
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Memory pools. */
|
||||
|
||||
struct dummy
|
||||
|
@ -542,6 +542,11 @@ extern const cpp_lexer_pos *cpp_get_line PARAMS ((cpp_reader *));
|
||||
extern const unsigned char *cpp_macro_definition PARAMS ((cpp_reader *,
|
||||
const cpp_hashnode *));
|
||||
|
||||
/* Evaluate a CPP_CHAR or CPP_WCHAR token. */
|
||||
extern HOST_WIDE_INT
|
||||
cpp_interpret_charconst PARAMS ((cpp_reader *, const cpp_token *,
|
||||
int, int, unsigned int *));
|
||||
|
||||
extern void cpp_define PARAMS ((cpp_reader *, const char *));
|
||||
extern void cpp_assert PARAMS ((cpp_reader *, const char *));
|
||||
extern void cpp_undef PARAMS ((cpp_reader *, const char *));
|
||||
|
Loading…
Reference in New Issue
Block a user