mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-01-11 07:04:33 +08:00
preprocessor: C2x identifier rules
C2x has, like C++, adopted rules for identifiers based directly on an unversioned normative reference to Unicode. Make libcpp follow those rules for c2x / gnu2x standards (this involves bringing back a flag separate from the C++ one for whether to use these identifier rules, but this time enabled for all C++ language versions since that was the conclusion adopted for C++ identifier handling). There is one change here that affects C++. I believe the new normative requirement for NFC only applies to identifiers, not to the use of identifier-continue characters in pp-numbers, where there is no such requirement and so the diagnostic ought to be a warning not a pedwarn in pp-numbers, and that this is the case for both C and C++. Bootstrapped with no regressions for x86_64-pc-linux-gnu. libcpp/ * charset.cc (ucn_valid_in_identifier): Check xid_identifiers not cplusplus to determine whether to use CXX23 and NXX23 flags. * include/cpplib.h (struct cpp_options): Add xid_identifiers. * init.cc (struct lang_flags, lang_defaults): Add xid_identifiers. (cpp_set_lang): Set xid_identifiers. * lex.cc (warn_about_normalization): Add parameter identifier. Only pedwarn about non-NFC for identifiers, not pp-numbers. (_cpp_lex_direct): Update calls to warn_about_normalization. gcc/testsuite/ * gcc.dg/cpp/c2x-ucnid-1-utf8.c, gcc.dg/cpp/c2x-ucnid-1.c: New tests.
This commit is contained in:
parent
89f20c9ae4
commit
36d20fa4a8
13
gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1-utf8.c
Normal file
13
gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1-utf8.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
/* Test C2x (= Unicode) rules for characters in identifiers. */
|
||||||
|
/* { dg-do preprocess } */
|
||||||
|
/* { dg-options "-std=c2x -pedantic-errors" } */
|
||||||
|
|
||||||
|
¨
|
||||||
|
|
||||||
|
/* The requirement for NFC only applies in identifiers, not pp-numbers. */
|
||||||
|
|
||||||
|
À /* { dg-error "not in NFC" } */
|
||||||
|
ÿÀ /* { dg-error "not in NFC" } */
|
||||||
|
|
||||||
|
0À /* { dg-warning "not in NFC" } */
|
||||||
|
.1À /* { dg-warning "not in NFC" } */
|
13
gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1.c
Normal file
13
gcc/testsuite/gcc.dg/cpp/c2x-ucnid-1.c
Normal file
@ -0,0 +1,13 @@
|
|||||||
|
/* Test C2x (= Unicode) rules for characters in identifiers. */
|
||||||
|
/* { dg-do preprocess } */
|
||||||
|
/* { dg-options "-std=c2x -pedantic-errors" } */
|
||||||
|
|
||||||
|
\u00A8 /* { dg-error "is not valid in an identifier" } */
|
||||||
|
|
||||||
|
/* The requirement for NFC only applies in identifiers, not pp-numbers. */
|
||||||
|
|
||||||
|
A\u0300 /* { dg-error "not in NFC" } */
|
||||||
|
\u00ffA\u0300 /* { dg-error "not in NFC" } */
|
||||||
|
|
||||||
|
0A\u0300 /* { dg-warning "not in NFC" } */
|
||||||
|
.1A\u0300 /* { dg-warning "not in NFC" } */
|
@ -1291,7 +1291,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||||||
valid_flags = C99 | CXX | C11 | CXX23;
|
valid_flags = C99 | CXX | C11 | CXX23;
|
||||||
if (CPP_PEDANTIC (pfile))
|
if (CPP_PEDANTIC (pfile))
|
||||||
{
|
{
|
||||||
if (CPP_OPTION (pfile, cplusplus))
|
if (CPP_OPTION (pfile, xid_identifiers))
|
||||||
valid_flags = CXX23;
|
valid_flags = CXX23;
|
||||||
else if (CPP_OPTION (pfile, c11_identifiers))
|
else if (CPP_OPTION (pfile, c11_identifiers))
|
||||||
valid_flags = C11;
|
valid_flags = C11;
|
||||||
@ -1355,7 +1355,7 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
|||||||
return 2;
|
return 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (CPP_OPTION (pfile, cplusplus))
|
if (CPP_OPTION (pfile, xid_identifiers))
|
||||||
invalid_start_flags = NXX23;
|
invalid_start_flags = NXX23;
|
||||||
else if (CPP_OPTION (pfile, c11_identifiers))
|
else if (CPP_OPTION (pfile, c11_identifiers))
|
||||||
invalid_start_flags = N11;
|
invalid_start_flags = N11;
|
||||||
|
@ -496,6 +496,10 @@ struct cpp_options
|
|||||||
in C11. */
|
in C11. */
|
||||||
unsigned char c11_identifiers;
|
unsigned char c11_identifiers;
|
||||||
|
|
||||||
|
/* Nonzero means extended identifiers allow the characters specified
|
||||||
|
by Unicode XID_Start and XID_Continue properties. */
|
||||||
|
unsigned char xid_identifiers;
|
||||||
|
|
||||||
/* Nonzero for C++ 2014 Standard binary constants. */
|
/* Nonzero for C++ 2014 Standard binary constants. */
|
||||||
unsigned char binary_constants;
|
unsigned char binary_constants;
|
||||||
|
|
||||||
|
@ -82,6 +82,7 @@ struct lang_flags
|
|||||||
char extended_numbers;
|
char extended_numbers;
|
||||||
char extended_identifiers;
|
char extended_identifiers;
|
||||||
char c11_identifiers;
|
char c11_identifiers;
|
||||||
|
char xid_identifiers;
|
||||||
char std;
|
char std;
|
||||||
char digraphs;
|
char digraphs;
|
||||||
char uliterals;
|
char uliterals;
|
||||||
@ -102,31 +103,31 @@ struct lang_flags
|
|||||||
};
|
};
|
||||||
|
|
||||||
static const struct lang_flags lang_defaults[] =
|
static const struct lang_flags lang_defaults[] =
|
||||||
{ /* c99 c++ xnum xid c11 std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */
|
{ /* c99 c++ xnum xid c11 xidid std digr ulit rlit udlit bincst digsep trig u8chlit vaopt scope dfp szlit elifdef warndir delim trufal */
|
||||||
/* GNUC89 */ { 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
/* GNUC89 */ { 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
||||||
/* GNUC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
/* GNUC99 */ { 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
||||||
/* GNUC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
/* GNUC11 */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
||||||
/* GNUC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
/* GNUC17 */ { 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0 },
|
||||||
/* GNUC2X */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
|
/* GNUC2X */ { 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1 },
|
||||||
/* STDC89 */ { 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
/* STDC89 */ { 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
/* STDC94 */ { 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
/* STDC94 */ { 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
/* STDC99 */ { 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
/* STDC99 */ { 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
/* STDC11 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
/* STDC11 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
/* STDC17 */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
/* STDC17 */ { 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
|
||||||
/* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
|
/* STDC2X */ { 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1 },
|
||||||
/* GNUCXX */ { 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
|
/* GNUCXX */ { 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
|
/* CXX98 */ { 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* GNUCXX11 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
|
/* GNUCXX11 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
|
/* CXX11 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* GNUCXX14 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
|
/* GNUCXX14 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
|
/* CXX14 */ { 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* GNUCXX17 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
/* GNUCXX17 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1 },
|
/* CXX17 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* GNUCXX20 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
/* GNUCXX20 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
/* CXX20 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1 },
|
||||||
/* GNUCXX23 */ { 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
/* GNUCXX23 */ { 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
||||||
/* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
/* CXX23 */ { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1 },
|
||||||
/* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
|
/* ASM */ { 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Sets internal flags correctly for a given language. */
|
/* Sets internal flags correctly for a given language. */
|
||||||
@ -142,6 +143,7 @@ cpp_set_lang (cpp_reader *pfile, enum c_lang lang)
|
|||||||
CPP_OPTION (pfile, extended_numbers) = l->extended_numbers;
|
CPP_OPTION (pfile, extended_numbers) = l->extended_numbers;
|
||||||
CPP_OPTION (pfile, extended_identifiers) = l->extended_identifiers;
|
CPP_OPTION (pfile, extended_identifiers) = l->extended_identifiers;
|
||||||
CPP_OPTION (pfile, c11_identifiers) = l->c11_identifiers;
|
CPP_OPTION (pfile, c11_identifiers) = l->c11_identifiers;
|
||||||
|
CPP_OPTION (pfile, xid_identifiers) = l->xid_identifiers;
|
||||||
CPP_OPTION (pfile, std) = l->std;
|
CPP_OPTION (pfile, std) = l->std;
|
||||||
CPP_OPTION (pfile, digraphs) = l->digraphs;
|
CPP_OPTION (pfile, digraphs) = l->digraphs;
|
||||||
CPP_OPTION (pfile, uliterals) = l->uliterals;
|
CPP_OPTION (pfile, uliterals) = l->uliterals;
|
||||||
|
@ -2007,7 +2007,8 @@ name_p (cpp_reader *pfile, const cpp_string *string)
|
|||||||
static void
|
static void
|
||||||
warn_about_normalization (cpp_reader *pfile,
|
warn_about_normalization (cpp_reader *pfile,
|
||||||
const cpp_token *token,
|
const cpp_token *token,
|
||||||
const struct normalize_state *s)
|
const struct normalize_state *s,
|
||||||
|
bool identifier)
|
||||||
{
|
{
|
||||||
if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
|
if (CPP_OPTION (pfile, warn_normalize) < NORMALIZE_STATE_RESULT (s)
|
||||||
&& !pfile->state.skipping)
|
&& !pfile->state.skipping)
|
||||||
@ -2043,7 +2044,7 @@ warn_about_normalization (cpp_reader *pfile,
|
|||||||
if (NORMALIZE_STATE_RESULT (s) == normalized_C)
|
if (NORMALIZE_STATE_RESULT (s) == normalized_C)
|
||||||
cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
|
cpp_warning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
|
||||||
"`%.*s' is not in NFKC", (int) sz, buf);
|
"`%.*s' is not in NFKC", (int) sz, buf);
|
||||||
else if (CPP_OPTION (pfile, cplusplus))
|
else if (identifier && CPP_OPTION (pfile, xid_identifiers))
|
||||||
cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
|
cpp_pedwarning_at (pfile, CPP_W_NORMALIZE, &rich_loc,
|
||||||
"`%.*s' is not in NFC", (int) sz, buf);
|
"`%.*s' is not in NFC", (int) sz, buf);
|
||||||
else
|
else
|
||||||
@ -3839,7 +3840,7 @@ _cpp_lex_direct (cpp_reader *pfile)
|
|||||||
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
||||||
result->type = CPP_NUMBER;
|
result->type = CPP_NUMBER;
|
||||||
lex_number (pfile, &result->val.str, &nst);
|
lex_number (pfile, &result->val.str, &nst);
|
||||||
warn_about_normalization (pfile, result, &nst);
|
warn_about_normalization (pfile, result, &nst, false);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -3888,7 +3889,7 @@ _cpp_lex_direct (cpp_reader *pfile)
|
|||||||
result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
|
result->val.node.node = lex_identifier (pfile, buffer->cur - 1, false,
|
||||||
&nst,
|
&nst,
|
||||||
&result->val.node.spelling);
|
&result->val.node.spelling);
|
||||||
warn_about_normalization (pfile, result, &nst);
|
warn_about_normalization (pfile, result, &nst, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Convert named operators to their proper types. */
|
/* Convert named operators to their proper types. */
|
||||||
@ -4101,7 +4102,7 @@ _cpp_lex_direct (cpp_reader *pfile)
|
|||||||
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
||||||
result->type = CPP_NUMBER;
|
result->type = CPP_NUMBER;
|
||||||
lex_number (pfile, &result->val.str, &nst);
|
lex_number (pfile, &result->val.str, &nst);
|
||||||
warn_about_normalization (pfile, result, &nst);
|
warn_about_normalization (pfile, result, &nst, false);
|
||||||
}
|
}
|
||||||
else if (*buffer->cur == '.' && buffer->cur[1] == '.')
|
else if (*buffer->cur == '.' && buffer->cur[1] == '.')
|
||||||
buffer->cur += 2, result->type = CPP_ELLIPSIS;
|
buffer->cur += 2, result->type = CPP_ELLIPSIS;
|
||||||
@ -4192,7 +4193,7 @@ _cpp_lex_direct (cpp_reader *pfile)
|
|||||||
result->type = CPP_NAME;
|
result->type = CPP_NAME;
|
||||||
result->val.node.node = lex_identifier (pfile, base, true, &nst,
|
result->val.node.node = lex_identifier (pfile, base, true, &nst,
|
||||||
&result->val.node.spelling);
|
&result->val.node.spelling);
|
||||||
warn_about_normalization (pfile, result, &nst);
|
warn_about_normalization (pfile, result, &nst, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user