diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog index 336159d79f64..f4fdccc448af 100644 --- a/gcc/c-family/ChangeLog +++ b/gcc/c-family/ChangeLog @@ -1,3 +1,8 @@ +2019-11-14 Joseph Myers + + * c-lex.c (lex_charconst): Make CPP_UTF8CHAR constants unsigned + char for C. + 2019-11-14 Jakub Jelinek * c-omp.c (c_omp_check_context_selector): Add nvidia to the list of diff --git a/gcc/c-family/c-lex.c b/gcc/c-family/c-lex.c index 42010a762a62..d446633f8145 100644 --- a/gcc/c-family/c-lex.c +++ b/gcc/c-family/c-lex.c @@ -1376,7 +1376,9 @@ lex_charconst (const cpp_token *token) type = char16_type_node; else if (token->type == CPP_UTF8CHAR) { - if (flag_char8_t) + if (!c_dialect_cxx ()) + type = unsigned_char_type_node; + else if (flag_char8_t) type = char8_type_node; else type = char_type_node; diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog index 04dce4b45cea..b881cab75ded 100644 --- a/gcc/c/ChangeLog +++ b/gcc/c/ChangeLog @@ -1,3 +1,9 @@ +2019-11-14 Joseph Myers + + * c-parser.c (c_parser_postfix_expression) + (c_parser_check_literal_zero): Handle CPP_UTF8CHAR. + * gimple-parser.c (c_parser_gimple_postfix_expression): Likewise. + 2019-11-14 Richard Sandiford * c-typeck.c (build_conditional_expr): Use truth_type_for instead diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c index 5e30a7f19168..8ce4e70a0fc0 100644 --- a/gcc/c/c-parser.c +++ b/gcc/c/c-parser.c @@ -8783,6 +8783,7 @@ c_parser_postfix_expression (c_parser *parser) case CPP_CHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: case CPP_WCHAR: expr.value = c_parser_peek_token (parser)->value; /* For the purpose of warning when a pointer is compared with @@ -10459,6 +10460,7 @@ c_parser_check_literal_zero (c_parser *parser, unsigned *literal_zero_mask, case CPP_WCHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: /* If a parameter is literal zero alone, remember it for -Wmemset-transposed-args warning. */ if (integer_zerop (tok->value) diff --git a/gcc/c/gimple-parser.c b/gcc/c/gimple-parser.c index 6fdb83c1abe1..c16d0dfb88ea 100644 --- a/gcc/c/gimple-parser.c +++ b/gcc/c/gimple-parser.c @@ -1395,6 +1395,7 @@ c_parser_gimple_postfix_expression (gimple_parser &parser) case CPP_CHAR: case CPP_CHAR16: case CPP_CHAR32: + case CPP_UTF8CHAR: case CPP_WCHAR: expr.value = c_parser_peek_token (parser)->value; set_c_expr_source_range (&expr, tok_range); diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 668444818f2b..51624b7212e4 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,9 @@ +2019-11-14 Joseph Myers + + * gcc.dg/c11-utf8char-1.c, gcc.dg/c2x-utf8char-1.c, + gcc.dg/c2x-utf8char-2.c, gcc.dg/c2x-utf8char-3.c, + gcc.dg/gnu2x-utf8char-1.c: New tests. + 2019-11-14 Richard Sandiford PR testsuite/92366 diff --git a/gcc/testsuite/gcc.dg/c11-utf8char-1.c b/gcc/testsuite/gcc.dg/c11-utf8char-1.c new file mode 100644 index 000000000000..26dbd92b3ade --- /dev/null +++ b/gcc/testsuite/gcc.dg/c11-utf8char-1.c @@ -0,0 +1,7 @@ +/* Test C2x UTF-8 characters. Test not accepted for C11. */ +/* { dg-do compile } */ +/* { dg-options "-std=c11 -pedantic-errors" } */ + +#define z(x) 0 +#define u8 z( +unsigned char a = u8'a'); diff --git a/gcc/testsuite/gcc.dg/c2x-utf8char-1.c b/gcc/testsuite/gcc.dg/c2x-utf8char-1.c new file mode 100644 index 000000000000..76543afca82b --- /dev/null +++ b/gcc/testsuite/gcc.dg/c2x-utf8char-1.c @@ -0,0 +1,29 @@ +/* Test C2x UTF-8 characters. Test valid usages. */ +/* { dg-do compile } */ +/* { dg-options "-std=c2x -pedantic-errors" } */ + +unsigned char a = u8'a'; +_Static_assert (u8'a' == 97); + +unsigned char b = u8'\0'; +_Static_assert (u8'\0' == 0); + +unsigned char c = u8'\xff'; +_Static_assert (u8'\xff' == 255); + +unsigned char d = u8'\377'; +_Static_assert (u8'\377' == 255); + +_Static_assert (sizeof (u8'a') == 1); +_Static_assert (sizeof (u8'\0') == 1); +_Static_assert (sizeof (u8'\xff') == 1); +_Static_assert (sizeof (u8'\377') == 1); + +_Static_assert (_Generic (u8'a', unsigned char: 1, default: 2) == 1); +_Static_assert (_Generic (u8'\0', unsigned char: 1, default: 2) == 1); +_Static_assert (_Generic (u8'\xff', unsigned char: 1, default: 2) == 1); +_Static_assert (_Generic (u8'\377', unsigned char: 1, default: 2) == 1); + +#if u8'\0' - 1 < 0 +#error "UTF-8 constants not unsigned in preprocessor" +#endif diff --git a/gcc/testsuite/gcc.dg/c2x-utf8char-2.c b/gcc/testsuite/gcc.dg/c2x-utf8char-2.c new file mode 100644 index 000000000000..4e6a2f6955f8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/c2x-utf8char-2.c @@ -0,0 +1,8 @@ +/* Test C2x UTF-8 characters. Character values not affected by + different execution character set. */ +/* { dg-do compile } */ +/* { dg-require-iconv "IBM1047" } */ +/* { dg-options "-std=c2x -pedantic-errors -fexec-charset=IBM1047" } */ + +_Static_assert (u8'a' == 97); +_Static_assert (u8'a' != (unsigned char) 'a'); diff --git a/gcc/testsuite/gcc.dg/c2x-utf8char-3.c b/gcc/testsuite/gcc.dg/c2x-utf8char-3.c new file mode 100644 index 000000000000..7c4898319989 --- /dev/null +++ b/gcc/testsuite/gcc.dg/c2x-utf8char-3.c @@ -0,0 +1,8 @@ +/* Test C2x UTF-8 characters. Test errors for invalid code. */ +/* { dg-do compile } */ +/* { dg-options "-std=c2x -pedantic-errors" } */ + +unsigned char a = u8''; /* { dg-error "empty character constant" } */ +unsigned char b = u8'ab'; /* { dg-error "character constant too long for its type" } */ +unsigned char c = u8'\u00ff'; /* { dg-error "character constant too long for its type" } */ +unsigned char d = u8'\x100'; /* { dg-error "hex escape sequence out of range" } */ diff --git a/gcc/testsuite/gcc.dg/gnu2x-utf8char-1.c b/gcc/testsuite/gcc.dg/gnu2x-utf8char-1.c new file mode 100644 index 000000000000..9c3add2ae83e --- /dev/null +++ b/gcc/testsuite/gcc.dg/gnu2x-utf8char-1.c @@ -0,0 +1,5 @@ +/* Test C2x UTF-8 characters. Test accepted with -std=gnu2x. */ +/* { dg-do compile } */ +/* { dg-options "-std=gnu2x" } */ + +#include "c2x-utf8char-1.c" diff --git a/libcpp/ChangeLog b/libcpp/ChangeLog index b57f26584a15..448f954d2ee1 100644 --- a/libcpp/ChangeLog +++ b/libcpp/ChangeLog @@ -1,3 +1,10 @@ +2019-11-14 Joseph Myers + + * charset.c (narrow_str_to_charconst): Make CPP_UTF8CHAR constants + unsigned for C. + * init.c (lang_defaults): Set utf8_char_literals for GNUC2X and + STDC2X. + 2019-11-07 Jakub Jelinek PR c++/91370 - Implement P1041R4 and P1139R2 - Stronger Unicode reqs diff --git a/libcpp/charset.c b/libcpp/charset.c index 0b8815af46bc..d4574415ac16 100644 --- a/libcpp/charset.c +++ b/libcpp/charset.c @@ -1928,6 +1928,8 @@ narrow_str_to_charconst (cpp_reader *pfile, cpp_string str, /* Multichar constants are of type int and therefore signed. */ if (i > 1) unsigned_p = 0; + else if (type == CPP_UTF8CHAR && !CPP_OPTION (pfile, cplusplus)) + unsigned_p = 1; else unsigned_p = CPP_OPTION (pfile, unsigned_char); diff --git a/libcpp/init.c b/libcpp/init.c index 32b0e70a2107..f5f41b012f89 100644 --- a/libcpp/init.c +++ b/libcpp/init.c @@ -102,13 +102,13 @@ static const struct lang_flags lang_defaults[] = /* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0 }, /* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0 }, /* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0 }, - /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1 }, + /* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1 }, /* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, /* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, /* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, /* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, /* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0 }, - /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1 }, + /* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1 }, /* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0 }, /* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0 }, /* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0 },