cpp.texi: Update for handling of charconsts.

* cpp.texi: Update for handling of charconsts.
	* cpplex.c (maybe_read_ucs): Don't accept D800-DFFF.  Update
	diagnostics.  Skip to the end if the UCS is too short.
	(cpp_interpret_charconst): Long charconsts issue a warning
	not an error.

	* gcc.dg/cpp/charconst.c: New tests.
	* gcc.dg/cpp/escape.c: New tests.
	* gcc.dg/cpp/escape-1.c: New tests.
	* gcc.dg/cpp/escape-2.c: New tests.
	* gcc.dg/cpp/ucs.c: New tests.

From-SVN: r42514
This commit is contained in:
Neil Booth 2001-05-23 22:50:28 +00:00 committed by Neil Booth
parent 75f3e3c8d6
commit f8710242d3
9 changed files with 207 additions and 42 deletions

View File

@ -1,3 +1,11 @@
2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
* cpp.texi: Update for handling of charconsts.
* cpplex.c (maybe_read_ucs): Don't accept D800-DFFF. Update
diagnostics. Skip to the end if the UCS is too short.
(cpp_interpret_charconst): Long charconsts issue a warning
not an error.
2001-05-23 Richard Henderson <rth@redhat.com>
* doc/install.texi (alpha-linux): Require binutils 2.11.

View File

@ -2889,22 +2889,17 @@ same column as it did in the original source file.
@item The numeric value of character constants in preprocessor expressions.
The preprocessor interprets character constants in preprocessing
directives on the host machine. Expressions outside preprocessing
directives are compiled to be interpreted on the target machine. In the
normal case of a native compiler, these two environments are the same
and so character constants will be evaluated identically in both cases.
However, in the case of a cross compiler, the values may be different.
The preprocessor and compiler interpret character constants in the same
way; escape sequences such as @code{\a} are given the values they would
have on the target machine.
Multi-character character constants are interpreted a character at a
time, shifting the previous result left by the number of bits per
character on the host, and adding the new character. For example, 'ab'
on an 8-bit host would be interpreted as 'a' * 256 + 'b'. If there are
more characters in the constant than can fit in the widest native
integer type on the host, usually a @samp{long}, the behavior is
undefined.
Evaluation of wide character constants is not properly implemented yet.
integer type on the host, usually a @samp{long}, the excess characters
are ignored and a diagnostic is given.
@item Source file inclusion.

View File

@ -1706,46 +1706,48 @@ maybe_read_ucs (pfile, pstr, limit, pc)
if (CPP_WTRADITIONAL (pfile))
cpp_warning (pfile, "the meaning of '\\%c' varies with -traditional", c);
for (length = (c == 'u' ? 4: 8); length; --length)
length = (c == 'u' ? 4: 8);
if ((size_t) (limit - p) < length)
{
if (p >= limit)
cpp_error (pfile, "incomplete universal-character-name");
/* Skip to the end to avoid more diagnostics. */
p = limit;
}
else
{
for (; length; length--, p++)
{
cpp_error (pfile, "incomplete universal-character-name");
break;
c = *p;
if (ISXDIGIT (c))
code = (code << 4) + hex_digit_value (c);
else
{
cpp_error (pfile,
"non-hex digit '%c' in universal-character-name", c);
/* We shouldn't skip in case there are multibyte chars. */
break;
}
}
c = *p;
if (ISXDIGIT (c))
{
code = (code << 4) + hex_digit_value (c);
p++;
}
else
{
cpp_error (pfile,
"non-hex digit '%c' in universal-character-name", c);
break;
}
}
#ifdef TARGET_EBCDIC
cpp_error (pfile, "universal-character-name on EBCDIC target");
code = 0x3f; /* EBCDIC invalid character */
#else
if (code > 0x9f && !(code & 0x80000000))
; /* True extended character, OK. */
else if (code >= 0x20 && code < 0x7f)
{
/* ASCII printable character. The C character set consists of all of
these except $, @ and `. We use hex escapes so that this also
works with EBCDIC hosts. */
if (code != 0x24 && code != 0x40 && code != 0x60)
cpp_error (pfile, "universal-character-name used for '%c'", code);
}
else
cpp_error (pfile, "invalid universal-character-name");
/* True extended characters are OK. */
if (code >= 0xa0
&& !(code & 0x80000000)
&& !(code >= 0xD800 && code <= 0xDFFF))
;
/* The standard permits $, @ and ` to be specified as UCNs. We use
hex escapes so that this also works with EBCDIC hosts. */
else if (code == 0x24 || code == 0x40 || code == 0x60)
;
/* Don't give another error if one occurred above. */
else if (length == 0)
cpp_error (pfile, "universal-character-name out of range");
#endif
*pstr = p;
@ -1970,7 +1972,7 @@ cpp_interpret_charconst (pfile, token, warn_multi, traditional, pchars_seen)
else if (chars_seen > max_chars)
{
chars_seen = max_chars;
cpp_error (pfile, "character constant too long");
cpp_warning (pfile, "character constant too long");
}
else if (chars_seen > 1 && !traditional && warn_multi)
cpp_warning (pfile, "multi-character character constant");

View File

@ -1,3 +1,11 @@
2001-05-23 Neil Booth <neil@daikokuya.demon.co.uk>
* gcc.dg/cpp/charconst.c: New tests.
* gcc.dg/cpp/escape.c: New tests.
* gcc.dg/cpp/escape-1.c: New tests.
* gcc.dg/cpp/escape-2.c: New tests.
* gcc.dg/cpp/ucs.c: New tests.
2001-05-23 David.Billinghurst <David.Billinghurst@riotinto.com>
* gcc.misc-tests/linkage.exp: Pass appropriate flags to

View File

@ -0,0 +1,33 @@
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* This tests various diagnostics about character constants, for both
the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '' /* { dg-warning "empty" "empty charconst" } */
#endif
#if L'' /* { dg-warning "empty" "empty wide charconst" } */
#endif
#if 'very long' /* { dg-warning "too long" "long charconst" } */
#endif
#if L'very long' /* { dg-warning "too long" "long wide charconst" } */
#endif
/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
#if 'ab' /* { dg-warning "multi-char" "multi-character" } */
#endif
void foo ()
{
int c = ''; /* { dg-warning "empty" "empty charconst" } */
c = L''; /* { dg-warning "empty" "empty wide charconst" } */
c = 'very long'; /* { dg-warning "too long" "long charconst" } */
c = L'very long'; /* { dg-warning "too long" "long wide charconst" } */
/* Don't do this test for L'ab'; it depends upon sizeof (wchar_t). */
c = 'ab'; /* { dg-warning "multi-char" "multi-character" } */
}

View File

@ -0,0 +1,39 @@
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* This tests various diagnostics about escape sequences, for both
the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '\x' /* { dg-error "no following" "\x with no digits" } */
#endif
#if '\x400' /* { dg-warning "out of range" "\x out of range" } */
#endif
#if '\x0ff' /* { dg-bogus "out of range" "\x out of range" } */
#endif
#if '\400' /* { dg-warning "out of range" "\x out of range" } */
#endif
#if '\377' /* { dg-bogus "out of range" "bogus \x out of range" } */
#endif
#if '\177' != 0x7f /* { dg-bogus "out of range" "bogus \x out of range" } */
#error bad octal /* { dg-bogus "bad" "bad octal evaluation" } */
#endif
#if '\0377' /* { dg-warning "multi" "too long octal" } */
#endif
#if '\p' /* { dg-error "unknown escape" "unknown escape seq" } */
#endif
void foo ()
{
int c;
c = '\x'; /* { dg-error "no following" "\x with no digits" } */
c = '\x100'; /* { dg-warning "out of range" "\x out of range" } */
c = '\x0ff'; /* { dg-bogus "out of range" "\x out of range" } */
c = '\400'; /* { dg-warning "out of range" "\x out of range" } */
c = '\377'; /* { dg-bogus "out of range" "bogus \x out of range" } */
c = '\0377'; /* { dg-warning "multi" "too long octal" } */
c = '\p'; /* { dg-error "unknown escape" "unknown escape seq" } */
}

View File

@ -0,0 +1,20 @@
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* { dg-options "-pedantic -std=c99 -fno-show-column" } */
/* This tests various diagnostics with -pedantic about escape
sequences, for both the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '\e' /* { dg-warning "non-ISO" "non-ISO \\e" } */
#endif
#if '\u00a0' /* { dg-bogus "unknown" "\\u is known in C99" } */
#endif
void foo ()
{
int c = '\E'; /* { dg-warning "non-ISO" "non-ISO \\E" } */
c = '\u00a0'; /* { dg-bogus "unknown" "\\u is known in C99" } */
}

View File

@ -0,0 +1,25 @@
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* { dg-options "-Wtraditional -std=c89 -fno-show-column" } */
/* This tests various diagnostics with -Wtraditioanl about escape
sequences, for both the preprocessor and the compiler.
Neil Booth, 22 May 2001. */
#if '\a' /* { dg-warning "traditional" "traditional bell" } */
#endif
#if '\x1a' != 26 /* { dg-warning "traditional" "traditional hex" } */
#error bad hex /* { dg-bogus "bad" "bad hexadecimal evaluation" } */
#endif
#if '\u' /* { dg-warning "unknown" "\u is unknown in C89" } */
#endif
void foo ()
{
int c = '\a'; /* { dg-warning "traditional" "traditional bell" } */
c = '\xa1'; /* { dg-warning "traditional" "traditional hex" } */
c = '\u'; /* { dg-warning "unknown" "\u is unknown in C89" } */
}

View File

@ -0,0 +1,35 @@
/* Copyright (C) 2001 Free Software Foundation, Inc. */
/* { dg-do compile } */
/* { dg-options "-std=c99" } */
/* This tests universal character sequences.
Neil Booth, 22 May 2001. */
#if L'\u1234' != 0x1234
#error bad short ucs /* { dg-bogus "bad" "bad \u1234 evaluation" } */
#endif
#if L'\U1234abcd' != 0x1234abcd
#error bad long ucs /* { dg-bogus "bad" "bad \U1234abcd evaluation" } */
#endif
void foo ()
{
int c;
c = L'\ubad'; /* { dg-error "incomplete" "incompete UCN 1" } */
c = L"\U1234"[0]; /* { dg-error "incomplete" "incompete UCN 2" } */
c = L'\u000x'; /* { dg-error "non-hex" "non-hex digit in UCN" } */
/* { dg-warning "too long" "" { target *-*-* } 24 } */
c = '\u0024'; /* { dg-bogus "invalid" "0024 is a valid UCN" } */
c = "\u0040"[0]; /* { dg-bogus "invalid" "0040 is a valid UCN" } */
c = '\u00a0'; /* { dg-bogus "invalid" "00a0 is a valid UCN" } */
c = '\U00000060'; /* { dg-bogus "invalid" "0060 is a valid UCN" } */
c = '\u0025'; /* { dg-error "range" "0025 is an invalid UCN" } */
c = L"\uD800"[0]; /* { dg-error "range" "D800 is an invalid UCN" } */
c = L'\U0000DFFF'; /* { dg-error "range" "DFFF is an invalid UCN" } */
}