mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-06 04:00:25 +08:00
On-demand locations within string-literals
gcc/c-family/ChangeLog: * c-common.c: Include "substring-locations.h". (get_cpp_ttype_from_string_type): New function. (g_string_concat_db): New global. (substring_loc::get_range): New method. * c-common.h (g_string_concat_db): New declaration. (class substring_loc): New class. * c-lex.c (lex_string): When concatenating strings, capture the locations of all tokens using a new obstack, and record the concatenation locations within g_string_concat_db. * c-opts.c (c_common_init_options): Construct g_string_concat_db on the ggc-heap. gcc/ChangeLog: * input.c (string_concat::string_concat): New constructor. (string_concat_db::string_concat_db): New constructor. (string_concat_db::record_string_concatenation): New method. (string_concat_db::get_string_concatenation): New method. (string_concat_db::get_key_loc): New method. (class auto_cpp_string_vec): New class. (get_substring_ranges_for_loc): New function. (get_source_range_for_substring): New function. (get_num_source_ranges_for_substring): New function. (class selftest::lexer_test_options): New class. (struct selftest::lexer_test): New struct. (class selftest::ebcdic_execution_charset): New class. (selftest::ebcdic_execution_charset::s_singleton): New variable. (selftest::lexer_test::lexer_test): New constructor. (selftest::lexer_test::~lexer_test): New destructor. (selftest::lexer_test::get_token): New method. (selftest::assert_char_at_range): New function. (ASSERT_CHAR_AT_RANGE): New macro. (selftest::assert_num_substring_ranges): New function. (ASSERT_NUM_SUBSTRING_RANGES): New macro. (selftest::assert_has_no_substring_ranges): New function. (ASSERT_HAS_NO_SUBSTRING_RANGES): New macro. (selftest::test_lexer_string_locations_simple): New function. (selftest::test_lexer_string_locations_ebcdic): New function. (selftest::test_lexer_string_locations_hex): New function. (selftest::test_lexer_string_locations_oct): New function. (selftest::test_lexer_string_locations_letter_escape_1): New function. (selftest::test_lexer_string_locations_letter_escape_2): New function. (selftest::test_lexer_string_locations_ucn4): New function. (selftest::test_lexer_string_locations_ucn8): New function. (selftest::uint32_from_big_endian): New function. (selftest::test_lexer_string_locations_wide_string): New function. (selftest::uint16_from_big_endian): New function. (selftest::test_lexer_string_locations_string16): New function. (selftest::test_lexer_string_locations_string32): New function. (selftest::test_lexer_string_locations_u8): New function. (selftest::test_lexer_string_locations_utf8_source): New function. (selftest::test_lexer_string_locations_concatenation_1): New function. (selftest::test_lexer_string_locations_concatenation_2): New function. (selftest::test_lexer_string_locations_concatenation_3): New function. (selftest::test_lexer_string_locations_macro): New function. (selftest::test_lexer_string_locations_stringified_macro_argument): New function. (selftest::test_lexer_string_locations_non_string): New function. (selftest::test_lexer_string_locations_long_line): New function. (selftest::test_lexer_char_constants): New function. (selftest::input_c_tests): Call the new test functions once per case within the line_table test matrix. * input.h (struct string_concat): New struct. (struct location_hash): New struct. (class string_concat_db): New class. * substring-locations.h: New header. gcc/testsuite/ChangeLog: * gcc.dg/plugin/diagnostic-test-string-literals-1.c: New file. * gcc.dg/plugin/diagnostic-test-string-literals-2.c: New file. * gcc.dg/plugin/diagnostic_plugin_test_string_literals.c: New file. * gcc.dg/plugin/plugin.exp (plugin_test_list): Add the above new files. libcpp/ChangeLog: * charset.c (cpp_substring_ranges::cpp_substring_ranges): New constructor. (cpp_substring_ranges::~cpp_substring_ranges): New destructor. (cpp_substring_ranges::add_range): New method. (cpp_substring_ranges::add_n_ranges): New method. (_cpp_valid_ucn): Add "char_range" and "loc_reader" params; if they are non-NULL, read position information from *loc_reader and update char_range->m_finish accordingly. (convert_ucn): Add "char_range", "loc_reader", and "ranges" params. If loc_reader is non-NULL, read location information from it, and update *ranges accordingly, using char_range. Conditionalize the conversion into tbuf on tbuf being non-NULL. (convert_hex): Likewise, conditionalizing the call to emit_numeric_escape on tbuf. (convert_oct): Likewise. (convert_escape): Add params "loc_reader" and "ranges". If loc_reader is non-NULL, read location information from it, and update *ranges accordingly. Conditionalize the conversion into tbuf on tbuf being non-NULL. (cpp_interpret_string): Rename to... (cpp_interpret_string_1): ...this, adding params "loc_readers" and "out". Use "to" to conditionalize the initialization and usage of "tbuf", such as running the converter. If "loc_readers" is non-NULL, use the instances within it, reading location information from them, and passing them to convert_escape; likewise write to "out" if loc_readers is non-NULL. Check for leading quote and issue an error if it is not present. Update boundary check from "== limit" to ">= limit" to protect against erroneous location values to calls that are not parsing string literals. (cpp_interpret_string): Reimplement in terms to cpp_interpret_string_1. (noop_error_cb): New function. (cpp_interpret_string_ranges): New function. (cpp_string_location_reader::cpp_string_location_reader): New constructor. (cpp_string_location_reader::get_next): New method. * include/cpplib.h (class cpp_string_location_reader): New class. (class cpp_substring_ranges): New class. (cpp_interpret_string_ranges): New prototype. * internal.h (_cpp_valid_ucn): Add params "char_range" and "loc_reader". * lex.c (forms_identifier_p): Pass NULL for new params to _cpp_valid_ucn. From-SVN: r239175
This commit is contained in:
parent
1addb9e62b
commit
88fa5555a3
@ -1,3 +1,61 @@
|
||||
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* input.c (string_concat::string_concat): New constructor.
|
||||
(string_concat_db::string_concat_db): New constructor.
|
||||
(string_concat_db::record_string_concatenation): New method.
|
||||
(string_concat_db::get_string_concatenation): New method.
|
||||
(string_concat_db::get_key_loc): New method.
|
||||
(class auto_cpp_string_vec): New class.
|
||||
(get_substring_ranges_for_loc): New function.
|
||||
(get_source_range_for_substring): New function.
|
||||
(get_num_source_ranges_for_substring): New function.
|
||||
(class selftest::lexer_test_options): New class.
|
||||
(struct selftest::lexer_test): New struct.
|
||||
(class selftest::ebcdic_execution_charset): New class.
|
||||
(selftest::ebcdic_execution_charset::s_singleton): New variable.
|
||||
(selftest::lexer_test::lexer_test): New constructor.
|
||||
(selftest::lexer_test::~lexer_test): New destructor.
|
||||
(selftest::lexer_test::get_token): New method.
|
||||
(selftest::assert_char_at_range): New function.
|
||||
(ASSERT_CHAR_AT_RANGE): New macro.
|
||||
(selftest::assert_num_substring_ranges): New function.
|
||||
(ASSERT_NUM_SUBSTRING_RANGES): New macro.
|
||||
(selftest::assert_has_no_substring_ranges): New function.
|
||||
(ASSERT_HAS_NO_SUBSTRING_RANGES): New macro.
|
||||
(selftest::test_lexer_string_locations_simple): New function.
|
||||
(selftest::test_lexer_string_locations_ebcdic): New function.
|
||||
(selftest::test_lexer_string_locations_hex): New function.
|
||||
(selftest::test_lexer_string_locations_oct): New function.
|
||||
(selftest::test_lexer_string_locations_letter_escape_1): New function.
|
||||
(selftest::test_lexer_string_locations_letter_escape_2): New function.
|
||||
(selftest::test_lexer_string_locations_ucn4): New function.
|
||||
(selftest::test_lexer_string_locations_ucn8): New function.
|
||||
(selftest::uint32_from_big_endian): New function.
|
||||
(selftest::test_lexer_string_locations_wide_string): New function.
|
||||
(selftest::uint16_from_big_endian): New function.
|
||||
(selftest::test_lexer_string_locations_string16): New function.
|
||||
(selftest::test_lexer_string_locations_string32): New function.
|
||||
(selftest::test_lexer_string_locations_u8): New function.
|
||||
(selftest::test_lexer_string_locations_utf8_source): New function.
|
||||
(selftest::test_lexer_string_locations_concatenation_1): New
|
||||
function.
|
||||
(selftest::test_lexer_string_locations_concatenation_2): New
|
||||
function.
|
||||
(selftest::test_lexer_string_locations_concatenation_3): New
|
||||
function.
|
||||
(selftest::test_lexer_string_locations_macro): New function.
|
||||
(selftest::test_lexer_string_locations_stringified_macro_argument):
|
||||
New function.
|
||||
(selftest::test_lexer_string_locations_non_string): New function.
|
||||
(selftest::test_lexer_string_locations_long_line): New function.
|
||||
(selftest::test_lexer_char_constants): New function.
|
||||
(selftest::input_c_tests): Call the new test functions once per
|
||||
case within the line_table test matrix.
|
||||
* input.h (struct string_concat): New struct.
|
||||
(struct location_hash): New struct.
|
||||
(class string_concat_db): New class.
|
||||
* substring-locations.h: New header.
|
||||
|
||||
2016-08-05 Patrick Palka <ppalka@gcc.gnu.org>
|
||||
|
||||
PR tree-optimization/72810
|
||||
|
@ -1,3 +1,17 @@
|
||||
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* c-common.c: Include "substring-locations.h".
|
||||
(get_cpp_ttype_from_string_type): New function.
|
||||
(g_string_concat_db): New global.
|
||||
(substring_loc::get_range): New method.
|
||||
* c-common.h (g_string_concat_db): New declaration.
|
||||
(class substring_loc): New class.
|
||||
* c-lex.c (lex_string): When concatenating strings, capture the
|
||||
locations of all tokens using a new obstack, and record the
|
||||
concatenation locations within g_string_concat_db.
|
||||
* c-opts.c (c_common_init_options): Construct g_string_concat_db
|
||||
on the ggc-heap.
|
||||
|
||||
2016-07-29 Marek Polacek <polacek@redhat.com>
|
||||
|
||||
PR c/71926
|
||||
|
@ -45,6 +45,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "tree-iterator.h"
|
||||
#include "opts.h"
|
||||
#include "gimplify.h"
|
||||
#include "substring-locations.h"
|
||||
|
||||
cpp_reader *parse_in; /* Declared in c-pragma.h. */
|
||||
|
||||
@ -1098,6 +1099,67 @@ fix_string_type (tree value)
|
||||
TREE_STATIC (value) = 1;
|
||||
return value;
|
||||
}
|
||||
|
||||
/* Given a string of type STRING_TYPE, determine what kind of string
|
||||
token would give an equivalent execution encoding: CPP_STRING,
|
||||
CPP_STRING16, or CPP_STRING32. Return CPP_OTHER in case of error.
|
||||
This may not be exactly the string token type that initially created
|
||||
the string, since CPP_WSTRING is indistinguishable from the 16/32 bit
|
||||
string type at this point.
|
||||
|
||||
This effectively reverses part of the logic in lex_string and
|
||||
fix_string_type. */
|
||||
|
||||
static enum cpp_ttype
|
||||
get_cpp_ttype_from_string_type (tree string_type)
|
||||
{
|
||||
gcc_assert (string_type);
|
||||
if (TREE_CODE (string_type) != ARRAY_TYPE)
|
||||
return CPP_OTHER;
|
||||
|
||||
tree element_type = TREE_TYPE (string_type);
|
||||
if (TREE_CODE (element_type) != INTEGER_TYPE)
|
||||
return CPP_OTHER;
|
||||
|
||||
int bits_per_character = TYPE_PRECISION (element_type);
|
||||
switch (bits_per_character)
|
||||
{
|
||||
case 8:
|
||||
return CPP_STRING; /* It could have also been CPP_UTF8STRING. */
|
||||
case 16:
|
||||
return CPP_STRING16;
|
||||
case 32:
|
||||
return CPP_STRING32;
|
||||
}
|
||||
|
||||
return CPP_OTHER;
|
||||
}
|
||||
|
||||
/* The global record of string concatentations, for use in
|
||||
extracting locations within string literals. */
|
||||
|
||||
GTY(()) string_concat_db *g_string_concat_db;
|
||||
|
||||
/* Attempt to determine the source range of the substring.
|
||||
If successful, return NULL and write the source range to *OUT_RANGE.
|
||||
Otherwise return an error message. Error messages are intended
|
||||
for GCC developers (to help debugging) rather than for end-users. */
|
||||
|
||||
const char *
|
||||
substring_loc::get_range (source_range *out_range) const
|
||||
{
|
||||
gcc_assert (out_range);
|
||||
|
||||
enum cpp_ttype tok_type = get_cpp_ttype_from_string_type (m_string_type);
|
||||
if (tok_type == CPP_OTHER)
|
||||
return "unrecognized string type";
|
||||
|
||||
return get_source_range_for_substring (parse_in, g_string_concat_db,
|
||||
m_fmt_string_loc, tok_type,
|
||||
m_start_idx, m_end_idx,
|
||||
out_range);
|
||||
}
|
||||
|
||||
|
||||
/* Fold X for consideration by one of the warning functions when checking
|
||||
whether an expression has a constant value. */
|
||||
|
@ -1110,6 +1110,35 @@ extern time_t cb_get_source_date_epoch (cpp_reader *pfile);
|
||||
__TIME__ can store. */
|
||||
#define MAX_SOURCE_DATE_EPOCH HOST_WIDE_INT_C (253402300799)
|
||||
|
||||
extern GTY(()) string_concat_db *g_string_concat_db;
|
||||
|
||||
/* libcpp can calculate location information about a range of characters
|
||||
within a string literal, but doing so is non-trivial.
|
||||
|
||||
This class encapsulates such a source location, so that it can be
|
||||
passed around (e.g. within c-format.c). It is effectively a deferred
|
||||
call into libcpp. If needed by a diagnostic, the actual source_range
|
||||
can be calculated by calling the get_range method. */
|
||||
|
||||
class substring_loc
|
||||
{
|
||||
public:
|
||||
substring_loc (location_t fmt_string_loc, tree string_type,
|
||||
int start_idx, int end_idx)
|
||||
: m_fmt_string_loc (fmt_string_loc), m_string_type (string_type),
|
||||
m_start_idx (start_idx), m_end_idx (end_idx) {}
|
||||
|
||||
const char *get_range (source_range *out_range) const;
|
||||
|
||||
location_t get_fmt_string_loc () const { return m_fmt_string_loc; }
|
||||
|
||||
private:
|
||||
location_t m_fmt_string_loc;
|
||||
tree m_string_type;
|
||||
int m_start_idx;
|
||||
int m_end_idx;
|
||||
};
|
||||
|
||||
/* In c-gimplify.c */
|
||||
extern void c_genericize (tree);
|
||||
extern int c_gimplify_expr (tree *, gimple_seq *, gimple_seq *);
|
||||
|
@ -1097,13 +1097,16 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||
tree value;
|
||||
size_t concats = 0;
|
||||
struct obstack str_ob;
|
||||
struct obstack loc_ob;
|
||||
cpp_string istr;
|
||||
enum cpp_ttype type = tok->type;
|
||||
|
||||
/* Try to avoid the overhead of creating and destroying an obstack
|
||||
for the common case of just one string. */
|
||||
cpp_string str = tok->val.str;
|
||||
location_t init_loc = tok->src_loc;
|
||||
cpp_string *strs = &str;
|
||||
location_t *locs = NULL;
|
||||
|
||||
/* objc_at_sign_was_seen is only used when doing Objective-C string
|
||||
concatenation. It is 'true' if we have seen an '@' before the
|
||||
@ -1142,16 +1145,21 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||
else
|
||||
error ("unsupported non-standard concatenation of string literals");
|
||||
}
|
||||
/* FALLTHROUGH */
|
||||
|
||||
case CPP_STRING:
|
||||
if (!concats)
|
||||
{
|
||||
gcc_obstack_init (&str_ob);
|
||||
gcc_obstack_init (&loc_ob);
|
||||
obstack_grow (&str_ob, &str, sizeof (cpp_string));
|
||||
obstack_grow (&loc_ob, &init_loc, sizeof (location_t));
|
||||
}
|
||||
|
||||
concats++;
|
||||
obstack_grow (&str_ob, &tok->val.str, sizeof (cpp_string));
|
||||
obstack_grow (&loc_ob, &tok->src_loc, sizeof (location_t));
|
||||
|
||||
if (objc_string)
|
||||
objc_at_sign_was_seen = false;
|
||||
goto retry;
|
||||
@ -1164,7 +1172,10 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||
/* We have read one more token than we want. */
|
||||
_cpp_backup_tokens (parse_in, 1);
|
||||
if (concats)
|
||||
strs = XOBFINISH (&str_ob, cpp_string *);
|
||||
{
|
||||
strs = XOBFINISH (&str_ob, cpp_string *);
|
||||
locs = XOBFINISH (&loc_ob, location_t *);
|
||||
}
|
||||
|
||||
if (concats && !objc_string && !in_system_header_at (input_location))
|
||||
warning (OPT_Wtraditional,
|
||||
@ -1176,6 +1187,12 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||
{
|
||||
value = build_string (istr.len, (const char *) istr.text);
|
||||
free (CONST_CAST (unsigned char *, istr.text));
|
||||
if (concats)
|
||||
{
|
||||
gcc_assert (locs);
|
||||
gcc_assert (g_string_concat_db);
|
||||
g_string_concat_db->record_string_concatenation (concats + 1, locs);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1227,7 +1244,10 @@ lex_string (const cpp_token *tok, tree *valp, bool objc_string, bool translate)
|
||||
*valp = fix_string_type (value);
|
||||
|
||||
if (concats)
|
||||
obstack_free (&str_ob, 0);
|
||||
{
|
||||
obstack_free (&str_ob, 0);
|
||||
obstack_free (&loc_ob, 0);
|
||||
}
|
||||
|
||||
return objc_string ? CPP_OBJC_STRING : type;
|
||||
}
|
||||
|
@ -216,6 +216,9 @@ c_common_init_options (unsigned int decoded_options_count,
|
||||
unsigned int i;
|
||||
struct cpp_callbacks *cb;
|
||||
|
||||
g_string_concat_db
|
||||
= new (ggc_alloc <string_concat_db> ()) string_concat_db ();
|
||||
|
||||
parse_in = cpp_create_reader (c_dialect_cxx () ? CLK_GNUCXX: CLK_GNUC89,
|
||||
ident_hash, line_table);
|
||||
cb = cpp_get_callbacks (parse_in);
|
||||
|
1547
gcc/input.c
1547
gcc/input.c
File diff suppressed because it is too large
Load Diff
35
gcc/input.h
35
gcc/input.h
@ -95,4 +95,39 @@ void dump_location_info (FILE *stream);
|
||||
|
||||
void diagnostics_file_cache_fini (void);
|
||||
|
||||
struct GTY(()) string_concat
|
||||
{
|
||||
string_concat (int num, location_t *locs);
|
||||
|
||||
int m_num;
|
||||
location_t * GTY ((atomic)) m_locs;
|
||||
};
|
||||
|
||||
struct location_hash : int_hash <location_t, UNKNOWN_LOCATION> { };
|
||||
|
||||
class GTY(()) string_concat_db
|
||||
{
|
||||
public:
|
||||
string_concat_db ();
|
||||
void record_string_concatenation (int num, location_t *locs);
|
||||
|
||||
bool get_string_concatenation (location_t loc,
|
||||
int *out_num,
|
||||
location_t **out_locs);
|
||||
|
||||
private:
|
||||
static location_t get_key_loc (location_t loc);
|
||||
|
||||
/* For the fields to be private, we must grant access to the
|
||||
generated code in gtype-desc.c. */
|
||||
|
||||
friend void ::gt_ggc_mx_string_concat_db (void *x_p);
|
||||
friend void ::gt_pch_nx_string_concat_db (void *x_p);
|
||||
friend void ::gt_pch_p_16string_concat_db (void *this_obj, void *x_p,
|
||||
gt_pointer_operator op,
|
||||
void *cookie);
|
||||
|
||||
hash_map <location_hash, string_concat *> *m_table;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
30
gcc/substring-locations.h
Normal file
30
gcc/substring-locations.h
Normal file
@ -0,0 +1,30 @@
|
||||
/* Source locations within string literals.
|
||||
Copyright (C) 2016 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GCC_SUBSTRING_LOCATIONS_H
|
||||
#define GCC_SUBSTRING_LOCATIONS_H
|
||||
|
||||
extern const char *get_source_range_for_substring (cpp_reader *pfile,
|
||||
string_concat_db *concats,
|
||||
location_t strloc,
|
||||
enum cpp_ttype type,
|
||||
int start_idx, int end_idx,
|
||||
source_range *out_range);
|
||||
|
||||
#endif /* ! GCC_SUBSTRING_LOCATIONS_H */
|
@ -1,3 +1,10 @@
|
||||
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* gcc.dg/plugin/diagnostic-test-string-literals-1.c: New file.
|
||||
* gcc.dg/plugin/diagnostic-test-string-literals-2.c: New file.
|
||||
* gcc.dg/plugin/diagnostic_plugin_test_string_literals.c: New file.
|
||||
* gcc.dg/plugin/plugin.exp (plugin_test_list): Add the above new files.
|
||||
|
||||
2016-08-05 Patrick Palka <ppalka@gcc.gnu.org>
|
||||
|
||||
PR tree-optimization/72810
|
||||
|
211
gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
Normal file
211
gcc/testsuite/gcc.dg/plugin/diagnostic-test-string-literals-1.c
Normal file
@ -0,0 +1,211 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O -fdiagnostics-show-caret" } */
|
||||
|
||||
/* This is a collection of unittests for ranges within string literals,
|
||||
using diagnostic_plugin_test_string_literals, which handles
|
||||
"__emit_string_literal_range" by generating a warning at the given
|
||||
subset of a string literal.
|
||||
|
||||
The indices are 0-based. It's easiest to verify things using string
|
||||
literals that are runs of 0-based digits (to avoid having to count
|
||||
characters).
|
||||
|
||||
LITERAL is a const void * to allow testing the various kinds of wide
|
||||
string literal, rather than just const char *. */
|
||||
|
||||
extern void __emit_string_literal_range (const void *literal,
|
||||
int start_idx, int end_idx);
|
||||
|
||||
void
|
||||
test_simple_string_literal (void)
|
||||
{
|
||||
__emit_string_literal_range ("0123456789", /* { dg-warning "range" } */
|
||||
6, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("0123456789",
|
||||
^~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_concatenated_string_literal (void)
|
||||
{
|
||||
__emit_string_literal_range ("01234" "56789", /* { dg-warning "range" } */
|
||||
3, 6);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234" "56789",
|
||||
^~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_multiline_string_literal (void)
|
||||
{
|
||||
__emit_string_literal_range ("01234" /* { dg-warning "range" } */
|
||||
"56789",
|
||||
3, 6);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234"
|
||||
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
"56789",
|
||||
~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
/* FIXME: why does the above need two trailing spaces? */
|
||||
}
|
||||
|
||||
/* Tests of various unicode encodings.
|
||||
|
||||
Digits 0 through 9 are unicode code points:
|
||||
U+0030 DIGIT ZERO
|
||||
...
|
||||
U+0039 DIGIT NINE
|
||||
However, these are not always valid as UCN (see the comment in
|
||||
libcpp/charset.c:_cpp_valid_ucn).
|
||||
|
||||
Hence we need to test UCN using an alternative unicode
|
||||
representation of numbers; let's use Roman numerals,
|
||||
(though these start at one, not zero):
|
||||
U+2170 SMALL ROMAN NUMERAL ONE
|
||||
...
|
||||
U+2174 SMALL ROMAN NUMERAL FIVE ("v")
|
||||
U+2175 SMALL ROMAN NUMERAL SIX ("vi")
|
||||
...
|
||||
U+2178 SMALL ROMAN NUMERAL NINE. */
|
||||
|
||||
void
|
||||
test_hex (void)
|
||||
{
|
||||
/* Digits 0-9, expressing digit 5 in ASCII as "\x35"
|
||||
and with a space in place of digit 6, to terminate the escaped
|
||||
hex code. */
|
||||
__emit_string_literal_range ("01234\x35 789", /* { dg-warning "range" } */
|
||||
3, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234\x35 789"
|
||||
^~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_oct (void)
|
||||
{
|
||||
/* Digits 0-9, expressing digit 5 in ASCII as "\065"
|
||||
and with a space in place of digit 6, to terminate the escaped
|
||||
octal code. */
|
||||
__emit_string_literal_range ("01234\065 789", /* { dg-warning "range" } */
|
||||
3, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234\065 789"
|
||||
^~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_multiple (void)
|
||||
{
|
||||
/* Digits 0-9, expressing digit 5 in ASCII as hex "\x35"
|
||||
digit 6 in ASCII as octal "\066", concatenating multiple strings. */
|
||||
__emit_string_literal_range ("01234" "\x35" "\066" "789", /* { dg-warning "range" } */
|
||||
3, 8);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234" "\x35" "\066" "789",
|
||||
^~~~~~~~~~~~~~~~~~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_ucn4 (void)
|
||||
{
|
||||
/* Digits 0-9, expressing digits 5 and 6 as Roman numerals expressed
|
||||
as UCN 4.
|
||||
The resulting string is encoded as UTF-8. Most of the digits are 1 byte
|
||||
each, but digits 5 and 6 are encoded with 3 bytes each.
|
||||
Hence to underline digits 4-7 we need to underling using bytes 4-11 in
|
||||
the UTF-8 encoding. */
|
||||
__emit_string_literal_range ("01234\u2174\u2175789", /* { dg-warning "range" } */
|
||||
4, 11);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234\u2174\u2175789",
|
||||
^~~~~~~~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_ucn8 (void)
|
||||
{
|
||||
/* Digits 0-9, expressing digits 5 and 6 as Roman numerals as UCN 8.
|
||||
The resulting string is the same as as in test_ucn4 above, and hence
|
||||
has the same UTF-8 encoding, and so we again need to underline bytes
|
||||
4-11 in the UTF-8 encoding in order to underline digits 4-7. */
|
||||
__emit_string_literal_range ("01234\U00002174\U00002175789", /* { dg-warning "range" } */
|
||||
4, 11);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range ("01234\U00002174\U00002175789",
|
||||
^~~~~~~~~~~~~~~~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_u8 (void)
|
||||
{
|
||||
/* Digits 0-9. */
|
||||
__emit_string_literal_range (u8"0123456789", /* { dg-warning "range" } */
|
||||
4, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range (u8"0123456789",
|
||||
^~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_u (void)
|
||||
{
|
||||
/* Digits 0-9. */
|
||||
__emit_string_literal_range (u"0123456789", /* { dg-error "unable to read substring range: execution character set != source character set" } */
|
||||
4, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range (u"0123456789",
|
||||
^~~~~~~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_U (void)
|
||||
{
|
||||
/* Digits 0-9. */
|
||||
__emit_string_literal_range (U"0123456789", /* { dg-error "unable to read substring range: execution character set != source character set" } */
|
||||
4, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range (U"0123456789",
|
||||
^~~~~~~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_L (void)
|
||||
{
|
||||
/* Digits 0-9. */
|
||||
__emit_string_literal_range (L"0123456789", /* { dg-error "unable to read substring range: execution character set != source character set" } */
|
||||
4, 7);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
__emit_string_literal_range (L"0123456789",
|
||||
^~~~~~~~~~~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
||||
|
||||
void
|
||||
test_macro (void)
|
||||
{
|
||||
#define START "01234" /* { dg-warning "range" } */
|
||||
__emit_string_literal_range (START
|
||||
"56789",
|
||||
3, 6);
|
||||
/* { dg-begin-multiline-output "" }
|
||||
#define START "01234"
|
||||
^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
__emit_string_literal_range (START
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
"56789",
|
||||
~~~
|
||||
{ dg-end-multiline-output "" } */
|
||||
}
|
@ -0,0 +1,53 @@
|
||||
/* { dg-do compile } */
|
||||
|
||||
/* See the notes in diagnostic-test-string-literals-1.c.
|
||||
This test case has caret-printing disabled. */
|
||||
|
||||
extern void __emit_string_literal_range (const void *literal,
|
||||
int start_idx, int end_idx);
|
||||
/* Test of a stringified macro argument, by itself. */
|
||||
|
||||
void
|
||||
test_stringified_token_1 (int x)
|
||||
{
|
||||
#define STRINGIFY(EXPR) #EXPR
|
||||
|
||||
__emit_string_literal_range (STRINGIFY(x > 0), /* { dg-error "unable to read substring range: macro expansion" } */
|
||||
0, 4);
|
||||
|
||||
#undef STRINGIFY
|
||||
}
|
||||
|
||||
/* Test of a stringified token within a concatenation. */
|
||||
|
||||
void
|
||||
test_stringized_token_2 (int x)
|
||||
{
|
||||
#define EXAMPLE(EXPR, START_IDX, END_IDX) \
|
||||
do { \
|
||||
__emit_string_literal_range (" before " #EXPR " after \n", \
|
||||
START_IDX, END_IDX); \
|
||||
} while (0)
|
||||
|
||||
EXAMPLE(x > 0, 1, 6);
|
||||
/* { dg-error "unable to read substring range: cpp_interpret_string_1 failed" "" { target *-*-* } 28 } */
|
||||
|
||||
#undef EXAMPLE
|
||||
}
|
||||
|
||||
/* Test of a doubly-stringified macro argument (by itself). */
|
||||
|
||||
void
|
||||
test_stringified_token_3 (int x)
|
||||
{
|
||||
#define XSTR(s) STR(s)
|
||||
#define STR(s) #s
|
||||
#define FOO 123456789
|
||||
__emit_string_literal_range (XSTR (FOO), /* { dg-error "unable to read substring range: macro expansion" } */
|
||||
2, 3);
|
||||
|
||||
#undef XSTR
|
||||
#undef STR
|
||||
#undef FOO
|
||||
}
|
||||
|
@ -0,0 +1,212 @@
|
||||
/* This plugin uses the diagnostics code to verify tracking of source code
|
||||
locations within string literals. */
|
||||
/* { dg-options "-O" } */
|
||||
|
||||
#include "gcc-plugin.h"
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "tree.h"
|
||||
#include "stringpool.h"
|
||||
#include "toplev.h"
|
||||
#include "basic-block.h"
|
||||
#include "hash-table.h"
|
||||
#include "vec.h"
|
||||
#include "ggc.h"
|
||||
#include "basic-block.h"
|
||||
#include "tree-ssa-alias.h"
|
||||
#include "internal-fn.h"
|
||||
#include "gimple-fold.h"
|
||||
#include "tree-eh.h"
|
||||
#include "gimple-expr.h"
|
||||
#include "is-a.h"
|
||||
#include "gimple.h"
|
||||
#include "gimple-iterator.h"
|
||||
#include "tree.h"
|
||||
#include "tree-pass.h"
|
||||
#include "intl.h"
|
||||
#include "plugin-version.h"
|
||||
#include "c-family/c-common.h"
|
||||
#include "diagnostic.h"
|
||||
#include "context.h"
|
||||
#include "print-tree.h"
|
||||
#include "cpplib.h"
|
||||
#include "c-family/c-pragma.h"
|
||||
|
||||
int plugin_is_GPL_compatible;
|
||||
|
||||
/* A custom pass for printing string literal location information. */
|
||||
|
||||
const pass_data pass_data_test_string_literals =
|
||||
{
|
||||
GIMPLE_PASS, /* type */
|
||||
"test_string_literals", /* name */
|
||||
OPTGROUP_NONE, /* optinfo_flags */
|
||||
TV_NONE, /* tv_id */
|
||||
PROP_ssa, /* properties_required */
|
||||
0, /* properties_provided */
|
||||
0, /* properties_destroyed */
|
||||
0, /* todo_flags_start */
|
||||
0, /* todo_flags_finish */
|
||||
};
|
||||
|
||||
class pass_test_string_literals : public gimple_opt_pass
|
||||
{
|
||||
public:
|
||||
pass_test_string_literals(gcc::context *ctxt)
|
||||
: gimple_opt_pass(pass_data_test_string_literals, ctxt)
|
||||
{}
|
||||
|
||||
/* opt_pass methods: */
|
||||
bool gate (function *) { return true; }
|
||||
virtual unsigned int execute (function *);
|
||||
|
||||
}; // class pass_test_string_literals
|
||||
|
||||
/* Determine if STMT is a call with NUM_ARGS arguments to a function
|
||||
named FUNCNAME.
|
||||
If so, return STMT as a gcall *. Otherwise return NULL. */
|
||||
|
||||
static gcall *
|
||||
check_for_named_call (gimple *stmt,
|
||||
const char *funcname, unsigned int num_args)
|
||||
{
|
||||
gcc_assert (funcname);
|
||||
|
||||
gcall *call = dyn_cast <gcall *> (stmt);
|
||||
if (!call)
|
||||
return NULL;
|
||||
|
||||
tree fndecl = gimple_call_fndecl (call);
|
||||
if (!fndecl)
|
||||
return NULL;
|
||||
|
||||
if (strcmp (IDENTIFIER_POINTER (DECL_NAME (fndecl)), funcname))
|
||||
return NULL;
|
||||
|
||||
if (gimple_call_num_args (call) != num_args)
|
||||
{
|
||||
error_at (stmt->location, "expected number of args: %i (got %i)",
|
||||
num_args, gimple_call_num_args (call));
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return call;
|
||||
}
|
||||
|
||||
/* Emit a warning covering SRC_RANGE, with the caret at the start of
|
||||
SRC_RANGE. */
|
||||
|
||||
static void
|
||||
emit_warning (source_range src_range)
|
||||
{
|
||||
location_t loc
|
||||
= make_location (src_range.m_start, src_range.m_start, src_range.m_finish);
|
||||
warning_at (loc, 0, "range %i:%i-%i:%i",
|
||||
LOCATION_LINE (src_range.m_start),
|
||||
LOCATION_COLUMN (src_range.m_start),
|
||||
LOCATION_LINE (src_range.m_finish),
|
||||
LOCATION_COLUMN (src_range.m_finish));
|
||||
}
|
||||
|
||||
/* Support code for verifying that we are correctly tracking ranges
|
||||
within string literals, for use by diagnostic-test-string-literals-*.c.
|
||||
Emit a warning showing the range of a string literal, for each call to
|
||||
a function named "__emit_string_literal_range".
|
||||
The initial argument should be a string literal; arguments 2 and 3
|
||||
should be integer constants, giving the range within the string
|
||||
to be printed. */
|
||||
|
||||
static void
|
||||
test_string_literals (gimple *stmt)
|
||||
{
|
||||
gcall *call = check_for_named_call (stmt, "__emit_string_literal_range", 3);
|
||||
if (!call)
|
||||
return;
|
||||
|
||||
/* We expect an ADDR_EXPR with a STRING_CST inside it for the
|
||||
initial arg. */
|
||||
tree t_addr_string = gimple_call_arg (call, 0);
|
||||
if (TREE_CODE (t_addr_string) != ADDR_EXPR)
|
||||
{
|
||||
error_at (call->location, "string literal required for arg 1");
|
||||
return;
|
||||
}
|
||||
|
||||
tree t_string = TREE_OPERAND (t_addr_string, 0);
|
||||
if (TREE_CODE (t_string) != STRING_CST)
|
||||
{
|
||||
error_at (call->location, "string literal required for arg 1");
|
||||
return;
|
||||
}
|
||||
|
||||
tree t_start_idx = gimple_call_arg (call, 1);
|
||||
if (TREE_CODE (t_start_idx) != INTEGER_CST)
|
||||
{
|
||||
error_at (call->location, "integer constant required for arg 2");
|
||||
return;
|
||||
}
|
||||
int start_idx = TREE_INT_CST_LOW (t_start_idx);
|
||||
|
||||
tree t_end_idx = gimple_call_arg (call, 2);
|
||||
if (TREE_CODE (t_end_idx) != INTEGER_CST)
|
||||
{
|
||||
error_at (call->location, "integer constant required for arg 3");
|
||||
return;
|
||||
}
|
||||
int end_idx = TREE_INT_CST_LOW (t_end_idx);
|
||||
|
||||
/* A STRING_CST doesn't have a location, but the ADDR_EXPR does. */
|
||||
location_t strloc = EXPR_LOCATION (t_addr_string);
|
||||
source_range src_range;
|
||||
substring_loc substr_loc (strloc, TREE_TYPE (t_string),
|
||||
start_idx, end_idx);
|
||||
const char *err = substr_loc.get_range (&src_range);
|
||||
if (err)
|
||||
error_at (strloc, "unable to read substring range: %s", err);
|
||||
else
|
||||
emit_warning (src_range);
|
||||
}
|
||||
|
||||
/* Call test_string_literals on every statement within FUN. */
|
||||
|
||||
unsigned int
|
||||
pass_test_string_literals::execute (function *fun)
|
||||
{
|
||||
gimple_stmt_iterator gsi;
|
||||
basic_block bb;
|
||||
|
||||
FOR_EACH_BB_FN (bb, fun)
|
||||
for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
||||
{
|
||||
gimple *stmt = gsi_stmt (gsi);
|
||||
test_string_literals (stmt);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Entrypoint for the plugin. Create and register the custom pass. */
|
||||
|
||||
int
|
||||
plugin_init (struct plugin_name_args *plugin_info,
|
||||
struct plugin_gcc_version *version)
|
||||
{
|
||||
struct register_pass_info pass_info;
|
||||
const char *plugin_name = plugin_info->base_name;
|
||||
int argc = plugin_info->argc;
|
||||
struct plugin_argument *argv = plugin_info->argv;
|
||||
|
||||
if (!plugin_default_version_check (version, &gcc_version))
|
||||
return 1;
|
||||
|
||||
pass_info.pass = new pass_test_string_literals (g);
|
||||
pass_info.reference_pass_name = "ssa";
|
||||
pass_info.ref_pass_instance_number = 1;
|
||||
pass_info.pos_op = PASS_POS_INSERT_AFTER;
|
||||
register_callback (plugin_name, PLUGIN_PASS_MANAGER_SETUP, NULL,
|
||||
&pass_info);
|
||||
|
||||
return 0;
|
||||
}
|
@ -70,6 +70,9 @@ set plugin_test_list [list \
|
||||
diagnostic-test-expressions-1.c } \
|
||||
{ diagnostic_plugin_show_trees.c \
|
||||
diagnostic-test-show-trees-1.c } \
|
||||
{ diagnostic_plugin_test_string_literals.c \
|
||||
diagnostic-test-string-literals-1.c \
|
||||
diagnostic-test-string-literals-2.c } \
|
||||
{ location_overflow_plugin.c \
|
||||
location-overflow-test-1.c \
|
||||
location-overflow-test-2.c } \
|
||||
|
@ -1,3 +1,49 @@
|
||||
2016-08-05 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* charset.c (cpp_substring_ranges::cpp_substring_ranges): New
|
||||
constructor.
|
||||
(cpp_substring_ranges::~cpp_substring_ranges): New destructor.
|
||||
(cpp_substring_ranges::add_range): New method.
|
||||
(cpp_substring_ranges::add_n_ranges): New method.
|
||||
(_cpp_valid_ucn): Add "char_range" and "loc_reader" params; if
|
||||
they are non-NULL, read position information from *loc_reader
|
||||
and update char_range->m_finish accordingly.
|
||||
(convert_ucn): Add "char_range", "loc_reader", and "ranges"
|
||||
params. If loc_reader is non-NULL, read location information from
|
||||
it, and update *ranges accordingly, using char_range.
|
||||
Conditionalize the conversion into tbuf on tbuf being non-NULL.
|
||||
(convert_hex): Likewise, conditionalizing the call to
|
||||
emit_numeric_escape on tbuf.
|
||||
(convert_oct): Likewise.
|
||||
(convert_escape): Add params "loc_reader" and "ranges". If
|
||||
loc_reader is non-NULL, read location information from it, and
|
||||
update *ranges accordingly. Conditionalize the conversion into
|
||||
tbuf on tbuf being non-NULL.
|
||||
(cpp_interpret_string): Rename to...
|
||||
(cpp_interpret_string_1): ...this, adding params "loc_readers" and
|
||||
"out". Use "to" to conditionalize the initialization and usage of
|
||||
"tbuf", such as running the converter. If "loc_readers" is
|
||||
non-NULL, use the instances within it, reading location
|
||||
information from them, and passing them to convert_escape; likewise
|
||||
write to "out" if loc_readers is non-NULL. Check for leading
|
||||
quote and issue an error if it is not present. Update boundary
|
||||
check from "== limit" to ">= limit" to protect against erroneous
|
||||
location values to calls that are not parsing string literals.
|
||||
(cpp_interpret_string): Reimplement in terms to
|
||||
cpp_interpret_string_1.
|
||||
(noop_error_cb): New function.
|
||||
(cpp_interpret_string_ranges): New function.
|
||||
(cpp_string_location_reader::cpp_string_location_reader): New
|
||||
constructor.
|
||||
(cpp_string_location_reader::get_next): New method.
|
||||
* include/cpplib.h (class cpp_string_location_reader): New class.
|
||||
(class cpp_substring_ranges): New class.
|
||||
(cpp_interpret_string_ranges): New prototype.
|
||||
* internal.h (_cpp_valid_ucn): Add params "char_range" and
|
||||
"loc_reader".
|
||||
* lex.c (forms_identifier_p): Pass NULL for new params to
|
||||
_cpp_valid_ucn.
|
||||
|
||||
2016-08-01 Andreas Schwab <schwab@suse.de>
|
||||
|
||||
* include/cpplib.h: Fix comment typo.
|
||||
|
432
libcpp/charset.c
432
libcpp/charset.c
@ -812,6 +812,51 @@ cpp_host_to_exec_charset (cpp_reader *pfile, cppchar_t c)
|
||||
|
||||
|
||||
|
||||
/* cpp_substring_ranges's constructor. */
|
||||
|
||||
cpp_substring_ranges::cpp_substring_ranges () :
|
||||
m_ranges (NULL),
|
||||
m_num_ranges (0),
|
||||
m_alloc_ranges (8)
|
||||
{
|
||||
m_ranges = XNEWVEC (source_range, m_alloc_ranges);
|
||||
}
|
||||
|
||||
/* cpp_substring_ranges's destructor. */
|
||||
|
||||
cpp_substring_ranges::~cpp_substring_ranges ()
|
||||
{
|
||||
free (m_ranges);
|
||||
}
|
||||
|
||||
/* Add RANGE to the vector of source_range information. */
|
||||
|
||||
void
|
||||
cpp_substring_ranges::add_range (source_range range)
|
||||
{
|
||||
if (m_num_ranges >= m_alloc_ranges)
|
||||
{
|
||||
m_alloc_ranges *= 2;
|
||||
m_ranges
|
||||
= (source_range *)xrealloc (m_ranges,
|
||||
sizeof (source_range) * m_alloc_ranges);
|
||||
}
|
||||
m_ranges[m_num_ranges++] = range;
|
||||
}
|
||||
|
||||
/* Read NUM ranges from LOC_READER, adding them to the vector of source_range
|
||||
information. */
|
||||
|
||||
void
|
||||
cpp_substring_ranges::add_n_ranges (int num,
|
||||
cpp_string_location_reader &loc_reader)
|
||||
{
|
||||
for (int i = 0; i < num; i++)
|
||||
add_range (loc_reader.get_next ());
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Utility routine that computes a mask of the form 0000...111... with
|
||||
WIDTH 1-bits. */
|
||||
static inline size_t
|
||||
@ -980,18 +1025,27 @@ ucn_valid_in_identifier (cpp_reader *pfile, cppchar_t c,
|
||||
one beyond the UCN, or to the syntactically invalid character.
|
||||
|
||||
IDENTIFIER_POS is 0 when not in an identifier, 1 for the start of
|
||||
an identifier, or 2 otherwise. */
|
||||
an identifier, or 2 otherwise.
|
||||
|
||||
If CHAR_RANGE and LOC_READER are non-NULL, then position information is
|
||||
read from *LOC_READER and CHAR_RANGE->m_finish is updated accordingly. */
|
||||
|
||||
bool
|
||||
_cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
||||
const uchar *limit, int identifier_pos,
|
||||
struct normalize_state *nst, cppchar_t *cp)
|
||||
struct normalize_state *nst, cppchar_t *cp,
|
||||
source_range *char_range,
|
||||
cpp_string_location_reader *loc_reader)
|
||||
{
|
||||
cppchar_t result, c;
|
||||
unsigned int length;
|
||||
const uchar *str = *pstr;
|
||||
const uchar *base = str - 2;
|
||||
|
||||
/* char_range and loc_reader must either be both NULL, or both be
|
||||
non-NULL. */
|
||||
gcc_assert ((char_range != NULL) == (loc_reader != NULL));
|
||||
|
||||
if (!CPP_OPTION (pfile, cplusplus) && !CPP_OPTION (pfile, c99))
|
||||
cpp_error (pfile, CPP_DL_WARNING,
|
||||
"universal character names are only valid in C++ and C99");
|
||||
@ -1021,6 +1075,8 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
||||
if (!ISXDIGIT (c))
|
||||
break;
|
||||
str++;
|
||||
if (loc_reader)
|
||||
char_range->m_finish = loc_reader->get_next ().m_finish;
|
||||
result = (result << 4) + hex_value (c);
|
||||
}
|
||||
while (--length && str < limit);
|
||||
@ -1086,11 +1142,18 @@ _cpp_valid_ucn (cpp_reader *pfile, const uchar **pstr,
|
||||
}
|
||||
|
||||
/* Convert an UCN, pointed to by FROM, to UTF-8 encoding, then translate
|
||||
it to the execution character set and write the result into TBUF.
|
||||
An advanced pointer is returned. Issues all relevant diagnostics. */
|
||||
it to the execution character set and write the result into TBUF,
|
||||
if TBUF is non-NULL.
|
||||
An advanced pointer is returned. Issues all relevant diagnostics.
|
||||
If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
|
||||
contains the location of the character so far: location information
|
||||
is read from *LOC_READER, and *RANGES is updated accordingly. */
|
||||
static const uchar *
|
||||
convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||
source_range char_range,
|
||||
cpp_string_location_reader *loc_reader,
|
||||
cpp_substring_ranges *ranges)
|
||||
{
|
||||
cppchar_t ucn;
|
||||
uchar buf[6];
|
||||
@ -1099,8 +1162,17 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
int rval;
|
||||
struct normalize_state nst = INITIAL_NORMALIZE_STATE;
|
||||
|
||||
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||
|
||||
from++; /* Skip u/U. */
|
||||
_cpp_valid_ucn (pfile, &from, limit, 0, &nst, &ucn);
|
||||
|
||||
if (loc_reader)
|
||||
/* The u/U is part of the spelling of this character. */
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
|
||||
_cpp_valid_ucn (pfile, &from, limit, 0, &nst,
|
||||
&ucn, &char_range, loc_reader);
|
||||
|
||||
rval = one_cppchar_to_utf8 (ucn, &bufp, &bytesleft);
|
||||
if (rval)
|
||||
@ -1109,9 +1181,20 @@ convert_ucn (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
cpp_errno (pfile, CPP_DL_ERROR,
|
||||
"converting UCN to source character set");
|
||||
}
|
||||
else if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
|
||||
cpp_errno (pfile, CPP_DL_ERROR,
|
||||
"converting UCN to execution character set");
|
||||
else
|
||||
{
|
||||
if (tbuf)
|
||||
if (!APPLY_CONVERSION (cvt, buf, 6 - bytesleft, tbuf))
|
||||
cpp_errno (pfile, CPP_DL_ERROR,
|
||||
"converting UCN to execution character set");
|
||||
|
||||
if (loc_reader)
|
||||
{
|
||||
int num_encoded_bytes = 6 - bytesleft;
|
||||
for (int i = 0; i < num_encoded_bytes; i++)
|
||||
ranges->add_range (char_range);
|
||||
}
|
||||
}
|
||||
|
||||
return from;
|
||||
}
|
||||
@ -1167,31 +1250,48 @@ emit_numeric_escape (cpp_reader *pfile, cppchar_t n,
|
||||
}
|
||||
|
||||
/* Convert a hexadecimal escape, pointed to by FROM, to the execution
|
||||
character set and write it into the string buffer TBUF. Returns an
|
||||
advanced pointer, and issues diagnostics as necessary.
|
||||
character set and write it into the string buffer TBUF (if non-NULL).
|
||||
Returns an advanced pointer, and issues diagnostics as necessary.
|
||||
No character set translation occurs; this routine always produces the
|
||||
execution-set character with numeric value equal to the given hex
|
||||
number. You can, e.g. generate surrogate pairs this way. */
|
||||
number. You can, e.g. generate surrogate pairs this way.
|
||||
If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
|
||||
contains the location of the character so far: location information
|
||||
is read from *LOC_READER, and *RANGES is updated accordingly. */
|
||||
static const uchar *
|
||||
convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||
source_range char_range,
|
||||
cpp_string_location_reader *loc_reader,
|
||||
cpp_substring_ranges *ranges)
|
||||
{
|
||||
cppchar_t c, n = 0, overflow = 0;
|
||||
int digits_found = 0;
|
||||
size_t width = cvt.width;
|
||||
size_t mask = width_to_mask (width);
|
||||
|
||||
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||
|
||||
if (CPP_WTRADITIONAL (pfile))
|
||||
cpp_warning (pfile, CPP_W_TRADITIONAL,
|
||||
"the meaning of '\\x' is different in traditional C");
|
||||
|
||||
from++; /* Skip 'x'. */
|
||||
/* Skip 'x'. */
|
||||
from++;
|
||||
|
||||
/* The 'x' is part of the spelling of this character. */
|
||||
if (loc_reader)
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
|
||||
while (from < limit)
|
||||
{
|
||||
c = *from;
|
||||
if (! hex_p (c))
|
||||
break;
|
||||
from++;
|
||||
if (loc_reader)
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
overflow |= n ^ (n << 4 >> 4);
|
||||
n = (n << 4) + hex_value (c);
|
||||
digits_found = 1;
|
||||
@ -1211,7 +1311,10 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
n &= mask;
|
||||
}
|
||||
|
||||
emit_numeric_escape (pfile, n, tbuf, cvt);
|
||||
if (tbuf)
|
||||
emit_numeric_escape (pfile, n, tbuf, cvt);
|
||||
if (ranges)
|
||||
ranges->add_range (char_range);
|
||||
|
||||
return from;
|
||||
}
|
||||
@ -1221,10 +1324,16 @@ convert_hex (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
advanced pointer, and issues diagnostics as necessary.
|
||||
No character set translation occurs; this routine always produces the
|
||||
execution-set character with numeric value equal to the given octal
|
||||
number. */
|
||||
number.
|
||||
If LOC_READER is non-NULL, then RANGES must be non-NULL and CHAR_RANGE
|
||||
contains the location of the character so far: location information
|
||||
is read from *LOC_READER, and *RANGES is updated accordingly. */
|
||||
static const uchar *
|
||||
convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||
source_range char_range,
|
||||
cpp_string_location_reader *loc_reader,
|
||||
cpp_substring_ranges *ranges)
|
||||
{
|
||||
size_t count = 0;
|
||||
cppchar_t c, n = 0;
|
||||
@ -1232,12 +1341,17 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
size_t mask = width_to_mask (width);
|
||||
bool overflow = false;
|
||||
|
||||
/* loc_reader and ranges must either be both NULL, or both be non-NULL. */
|
||||
gcc_assert ((loc_reader != NULL) == (ranges != NULL));
|
||||
|
||||
while (from < limit && count++ < 3)
|
||||
{
|
||||
c = *from;
|
||||
if (c < '0' || c > '7')
|
||||
break;
|
||||
from++;
|
||||
if (loc_reader)
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
overflow |= n ^ (n << 3 >> 3);
|
||||
n = (n << 3) + c - '0';
|
||||
}
|
||||
@ -1249,18 +1363,26 @@ convert_oct (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
n &= mask;
|
||||
}
|
||||
|
||||
emit_numeric_escape (pfile, n, tbuf, cvt);
|
||||
if (tbuf)
|
||||
emit_numeric_escape (pfile, n, tbuf, cvt);
|
||||
if (ranges)
|
||||
ranges->add_range (char_range);
|
||||
|
||||
return from;
|
||||
}
|
||||
|
||||
/* Convert an escape sequence (pointed to by FROM) to its value on
|
||||
the target, and to the execution character set. Do not scan past
|
||||
LIMIT. Write the converted value into TBUF. Returns an advanced
|
||||
pointer. Handles all relevant diagnostics. */
|
||||
LIMIT. Write the converted value into TBUF, if TBUF is non-NULL.
|
||||
Returns an advanced pointer. Handles all relevant diagnostics.
|
||||
If LOC_READER is non-NULL, then RANGES must be non-NULL: location
|
||||
information is read from *LOC_READER, and *RANGES is updated
|
||||
accordingly. */
|
||||
static const uchar *
|
||||
convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt)
|
||||
struct _cpp_strbuf *tbuf, struct cset_converter cvt,
|
||||
cpp_string_location_reader *loc_reader,
|
||||
cpp_substring_ranges *ranges)
|
||||
{
|
||||
/* Values of \a \b \e \f \n \r \t \v respectively. */
|
||||
#if HOST_CHARSET == HOST_CHARSET_ASCII
|
||||
@ -1273,20 +1395,28 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
|
||||
uchar c;
|
||||
|
||||
/* Record the location of the backslash. */
|
||||
source_range char_range;
|
||||
if (loc_reader)
|
||||
char_range = loc_reader->get_next ();
|
||||
|
||||
c = *from;
|
||||
switch (c)
|
||||
{
|
||||
/* UCNs, hex escapes, and octal escapes are processed separately. */
|
||||
case 'u': case 'U':
|
||||
return convert_ucn (pfile, from, limit, tbuf, cvt);
|
||||
return convert_ucn (pfile, from, limit, tbuf, cvt,
|
||||
char_range, loc_reader, ranges);
|
||||
|
||||
case 'x':
|
||||
return convert_hex (pfile, from, limit, tbuf, cvt);
|
||||
return convert_hex (pfile, from, limit, tbuf, cvt,
|
||||
char_range, loc_reader, ranges);
|
||||
break;
|
||||
|
||||
case '0': case '1': case '2': case '3':
|
||||
case '4': case '5': case '6': case '7':
|
||||
return convert_oct (pfile, from, limit, tbuf, cvt);
|
||||
return convert_oct (pfile, from, limit, tbuf, cvt,
|
||||
char_range, loc_reader, ranges);
|
||||
|
||||
/* Various letter escapes. Get the appropriate host-charset
|
||||
value into C. */
|
||||
@ -1338,10 +1468,17 @@ convert_escape (cpp_reader *pfile, const uchar *from, const uchar *limit,
|
||||
}
|
||||
}
|
||||
|
||||
/* Now convert what we have to the execution character set. */
|
||||
if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
|
||||
cpp_errno (pfile, CPP_DL_ERROR,
|
||||
"converting escape sequence to execution character set");
|
||||
if (tbuf)
|
||||
/* Now convert what we have to the execution character set. */
|
||||
if (!APPLY_CONVERSION (cvt, &c, 1, tbuf))
|
||||
cpp_errno (pfile, CPP_DL_ERROR,
|
||||
"converting escape sequence to execution character set");
|
||||
|
||||
if (loc_reader)
|
||||
{
|
||||
char_range.m_finish = loc_reader->get_next ().m_finish;
|
||||
ranges->add_range (char_range);
|
||||
}
|
||||
|
||||
return from + 1;
|
||||
}
|
||||
@ -1374,28 +1511,52 @@ converter_for_type (cpp_reader *pfile, enum cpp_ttype type)
|
||||
are to be converted from the source to the execution character set,
|
||||
escape sequences translated, and finally all are to be
|
||||
concatenated. WIDE indicates whether or not to produce a wide
|
||||
string. The result is written into TO. Returns true for success,
|
||||
false for failure. */
|
||||
bool
|
||||
cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||
cpp_string *to, enum cpp_ttype type)
|
||||
string. If TO is non-NULL, the result is written into TO.
|
||||
If LOC_READERS and OUT are non-NULL, then location information
|
||||
is read from LOC_READERS (which must be an array of length COUNT),
|
||||
and location information is written to *RANGES.
|
||||
|
||||
Returns true for success, false for failure. */
|
||||
|
||||
static bool
|
||||
cpp_interpret_string_1 (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||
cpp_string *to, enum cpp_ttype type,
|
||||
cpp_string_location_reader *loc_readers,
|
||||
cpp_substring_ranges *out)
|
||||
{
|
||||
struct _cpp_strbuf tbuf;
|
||||
const uchar *p, *base, *limit;
|
||||
size_t i;
|
||||
struct cset_converter cvt = converter_for_type (pfile, type);
|
||||
|
||||
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
|
||||
tbuf.text = XNEWVEC (uchar, tbuf.asize);
|
||||
tbuf.len = 0;
|
||||
/* loc_readers and out must either be both NULL, or both be non-NULL. */
|
||||
gcc_assert ((loc_readers != NULL) == (out != NULL));
|
||||
|
||||
if (to)
|
||||
{
|
||||
tbuf.asize = MAX (OUTBUF_BLOCK_SIZE, from->len);
|
||||
tbuf.text = XNEWVEC (uchar, tbuf.asize);
|
||||
tbuf.len = 0;
|
||||
}
|
||||
|
||||
for (i = 0; i < count; i++)
|
||||
{
|
||||
cpp_string_location_reader *loc_reader = NULL;
|
||||
if (loc_readers)
|
||||
loc_reader = &loc_readers[i];
|
||||
|
||||
p = from[i].text;
|
||||
if (*p == 'u')
|
||||
{
|
||||
if (*++p == '8')
|
||||
p++;
|
||||
p++;
|
||||
if (loc_reader)
|
||||
loc_reader->get_next ();
|
||||
if (*p == '8')
|
||||
{
|
||||
p++;
|
||||
if (loc_reader)
|
||||
loc_reader->get_next ();
|
||||
}
|
||||
}
|
||||
else if (*p == 'L' || *p == 'U') p++;
|
||||
if (*p == 'R')
|
||||
@ -1414,13 +1575,43 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||
|
||||
/* Raw strings are all normal characters; these can be fed
|
||||
directly to convert_cset. */
|
||||
if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
|
||||
goto fail;
|
||||
if (to)
|
||||
if (!APPLY_CONVERSION (cvt, p, limit - p, &tbuf))
|
||||
goto fail;
|
||||
|
||||
if (loc_reader)
|
||||
{
|
||||
/* If generating source ranges, assume we have a 1:1
|
||||
correspondence between bytes in the source encoding and bytes
|
||||
in the execution encoding (e.g. if we have a UTF-8 to UTF-8
|
||||
conversion), so that this run of bytes in the source file
|
||||
corresponds to a run of bytes in the execution string.
|
||||
This requirement is guaranteed by an early-reject in
|
||||
cpp_interpret_string_ranges. */
|
||||
gcc_assert (cvt.func == convert_no_conversion);
|
||||
out->add_n_ranges (limit - p, *loc_reader);
|
||||
}
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
p++; /* Skip leading quote. */
|
||||
/* If we don't now have a leading quote, something has gone wrong.
|
||||
This can occur if cpp_interpret_string_ranges is handling a
|
||||
stringified macro argument, but should not be possible otherwise. */
|
||||
if (*p != '"' && *p != '\'')
|
||||
{
|
||||
gcc_assert (out != NULL);
|
||||
cpp_error (pfile, CPP_DL_ERROR, "missing open quote");
|
||||
if (to)
|
||||
free (tbuf.text);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Skip leading quote. */
|
||||
p++;
|
||||
if (loc_reader)
|
||||
loc_reader->get_next ();
|
||||
|
||||
limit = from[i].text + from[i].len - 1; /* Skip trailing quote. */
|
||||
|
||||
for (;;)
|
||||
@ -1432,29 +1623,130 @@ cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||
{
|
||||
/* We have a run of normal characters; these can be fed
|
||||
directly to convert_cset. */
|
||||
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
|
||||
goto fail;
|
||||
if (to)
|
||||
if (!APPLY_CONVERSION (cvt, base, p - base, &tbuf))
|
||||
goto fail;
|
||||
/* Similar to above: assumes we have a 1:1 correspondence
|
||||
between bytes in the source encoding and bytes in the
|
||||
execution encoding. */
|
||||
if (loc_reader)
|
||||
{
|
||||
gcc_assert (cvt.func == convert_no_conversion);
|
||||
out->add_n_ranges (p - base, *loc_reader);
|
||||
}
|
||||
}
|
||||
if (p == limit)
|
||||
if (p >= limit)
|
||||
break;
|
||||
|
||||
p = convert_escape (pfile, p + 1, limit, &tbuf, cvt);
|
||||
struct _cpp_strbuf *tbuf_ptr = to ? &tbuf : NULL;
|
||||
p = convert_escape (pfile, p + 1, limit, tbuf_ptr, cvt,
|
||||
loc_reader, out);
|
||||
}
|
||||
}
|
||||
/* NUL-terminate the 'to' buffer and translate it to a cpp_string
|
||||
structure. */
|
||||
emit_numeric_escape (pfile, 0, &tbuf, cvt);
|
||||
tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
|
||||
to->text = tbuf.text;
|
||||
to->len = tbuf.len;
|
||||
|
||||
if (to)
|
||||
{
|
||||
/* NUL-terminate the 'to' buffer and translate it to a cpp_string
|
||||
structure. */
|
||||
emit_numeric_escape (pfile, 0, &tbuf, cvt);
|
||||
tbuf.text = XRESIZEVEC (uchar, tbuf.text, tbuf.len);
|
||||
to->text = tbuf.text;
|
||||
to->len = tbuf.len;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
||||
fail:
|
||||
cpp_errno (pfile, CPP_DL_ERROR, "converting to execution character set");
|
||||
free (tbuf.text);
|
||||
if (to)
|
||||
free (tbuf.text);
|
||||
return false;
|
||||
}
|
||||
|
||||
/* FROM is an array of cpp_string structures of length COUNT. These
|
||||
are to be converted from the source to the execution character set,
|
||||
escape sequences translated, and finally all are to be
|
||||
concatenated. WIDE indicates whether or not to produce a wide
|
||||
string. The result is written into TO. Returns true for success,
|
||||
false for failure. */
|
||||
bool
|
||||
cpp_interpret_string (cpp_reader *pfile, const cpp_string *from, size_t count,
|
||||
cpp_string *to, enum cpp_ttype type)
|
||||
{
|
||||
return cpp_interpret_string_1 (pfile, from, count, to, type, NULL, NULL);
|
||||
}
|
||||
|
||||
/* A "do nothing" error-handling callback for use by
|
||||
cpp_interpret_string_ranges, so that it can temporarily suppress
|
||||
error-handling. */
|
||||
|
||||
static bool
|
||||
noop_error_cb (cpp_reader *, int, int, rich_location *,
|
||||
const char *, va_list *)
|
||||
{
|
||||
/* no-op. */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This function mimics the behavior of cpp_interpret_string, but
|
||||
rather than generating a string in the execution character set,
|
||||
*OUT is written to with the source code ranges of the characters
|
||||
in such a string.
|
||||
FROM and LOC_READERS should both be arrays of length COUNT.
|
||||
Returns NULL for success, or an error message for failure. */
|
||||
|
||||
const char *
|
||||
cpp_interpret_string_ranges (cpp_reader *pfile, const cpp_string *from,
|
||||
cpp_string_location_reader *loc_readers,
|
||||
size_t count,
|
||||
cpp_substring_ranges *out,
|
||||
enum cpp_ttype type)
|
||||
{
|
||||
/* There are a couple of cases in the range-handling in
|
||||
cpp_interpret_string_1 that rely on there being a 1:1 correspondence
|
||||
between bytes in the source encoding and bytes in the execution
|
||||
encoding, so that each byte in the execution string can correspond
|
||||
to the location of a byte in the source string.
|
||||
|
||||
This holds for the typical case of a UTF-8 to UTF-8 conversion.
|
||||
Enforce this requirement by only attempting to track substring
|
||||
locations if we have source encoding == execution encoding.
|
||||
|
||||
This is a stronger condition than we need, since we could e.g.
|
||||
have ASCII to EBCDIC (with 1 byte per character before and after),
|
||||
but it seems to be a reasonable restriction. */
|
||||
struct cset_converter cvt = converter_for_type (pfile, type);
|
||||
if (cvt.func != convert_no_conversion)
|
||||
return "execution character set != source character set";
|
||||
|
||||
/* For on-demand strings we have already lexed the strings, so there
|
||||
should be no errors. However, if we have bogus source location
|
||||
data (or stringified macro arguments), the attempt to lex the
|
||||
strings could fail with an error. Temporarily install an
|
||||
error-handler to catch the error, so that it can lead to this call
|
||||
failing, rather than being emitted as a user-visible diagnostic.
|
||||
If an error does occur, we should see it via the return value of
|
||||
cpp_interpret_string_1. */
|
||||
bool (*saved_error_handler) (cpp_reader *, int, int, rich_location *,
|
||||
const char *, va_list *)
|
||||
ATTRIBUTE_FPTR_PRINTF(5,0);
|
||||
|
||||
saved_error_handler = pfile->cb.error;
|
||||
pfile->cb.error = noop_error_cb;
|
||||
|
||||
bool result = cpp_interpret_string_1 (pfile, from, count, NULL, type,
|
||||
loc_readers, out);
|
||||
|
||||
/* Restore the saved error-handler. */
|
||||
pfile->cb.error = saved_error_handler;
|
||||
|
||||
if (!result)
|
||||
return "cpp_interpret_string_1 failed";
|
||||
|
||||
/* Success. */
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* Subroutine of do_line and do_linemarker. Convert escape sequences
|
||||
in a string, but do not perform character set conversion. */
|
||||
bool
|
||||
@ -1818,3 +2110,39 @@ _cpp_default_encoding (void)
|
||||
|
||||
return current_encoding;
|
||||
}
|
||||
|
||||
/* Implementation of class cpp_string_location_reader. */
|
||||
|
||||
/* Constructor for cpp_string_location_reader. */
|
||||
|
||||
cpp_string_location_reader::
|
||||
cpp_string_location_reader (source_location src_loc,
|
||||
line_maps *line_table)
|
||||
: m_line_table (line_table)
|
||||
{
|
||||
src_loc = get_range_from_loc (line_table, src_loc).m_start;
|
||||
|
||||
/* SRC_LOC might be a macro location. It only makes sense to do
|
||||
column-by-column calculations on ordinary maps, so get the
|
||||
corresponding location in an ordinary map. */
|
||||
m_loc
|
||||
= linemap_resolve_location (line_table, src_loc,
|
||||
LRK_SPELLING_LOCATION, NULL);
|
||||
|
||||
const line_map_ordinary *map
|
||||
= linemap_check_ordinary (linemap_lookup (line_table, m_loc));
|
||||
m_offset_per_column = (1 << map->m_range_bits);
|
||||
}
|
||||
|
||||
/* Get the range of the next source byte. */
|
||||
|
||||
source_range
|
||||
cpp_string_location_reader::get_next ()
|
||||
{
|
||||
source_range result;
|
||||
result.m_start = m_loc;
|
||||
result.m_finish = m_loc;
|
||||
if (m_loc <= LINE_MAP_MAX_LOCATION_WITH_COLS)
|
||||
m_loc += m_offset_per_column;
|
||||
return result;
|
||||
}
|
||||
|
@ -743,6 +743,51 @@ struct GTY(()) cpp_hashnode {
|
||||
union _cpp_hashnode_value GTY ((desc ("CPP_HASHNODE_VALUE_IDX (%1)"))) value;
|
||||
};
|
||||
|
||||
/* A class for iterating through the source locations within a
|
||||
string token (before escapes are interpreted, and before
|
||||
concatenation). */
|
||||
|
||||
class cpp_string_location_reader {
|
||||
public:
|
||||
cpp_string_location_reader (source_location src_loc,
|
||||
line_maps *line_table);
|
||||
|
||||
source_range get_next ();
|
||||
|
||||
private:
|
||||
source_location m_loc;
|
||||
int m_offset_per_column;
|
||||
line_maps *m_line_table;
|
||||
};
|
||||
|
||||
/* A class for storing the source ranges of all of the characters within
|
||||
a string literal, after escapes are interpreted, and after
|
||||
concatenation.
|
||||
|
||||
This is not GTY-marked, as instances are intended to be temporary. */
|
||||
|
||||
class cpp_substring_ranges
|
||||
{
|
||||
public:
|
||||
cpp_substring_ranges ();
|
||||
~cpp_substring_ranges ();
|
||||
|
||||
int get_num_ranges () const { return m_num_ranges; }
|
||||
source_range get_range (int idx) const
|
||||
{
|
||||
linemap_assert (idx < m_num_ranges);
|
||||
return m_ranges[idx];
|
||||
}
|
||||
|
||||
void add_range (source_range range);
|
||||
void add_n_ranges (int num, cpp_string_location_reader &loc_reader);
|
||||
|
||||
private:
|
||||
source_range *m_ranges;
|
||||
int m_num_ranges;
|
||||
int m_alloc_ranges;
|
||||
};
|
||||
|
||||
/* Call this first to get a handle to pass to other functions.
|
||||
|
||||
If you want cpplib to manage its own hashtable, pass in a NULL
|
||||
@ -829,6 +874,12 @@ extern cppchar_t cpp_interpret_charconst (cpp_reader *, const cpp_token *,
|
||||
extern bool cpp_interpret_string (cpp_reader *,
|
||||
const cpp_string *, size_t,
|
||||
cpp_string *, enum cpp_ttype);
|
||||
extern const char *cpp_interpret_string_ranges (cpp_reader *pfile,
|
||||
const cpp_string *from,
|
||||
cpp_string_location_reader *,
|
||||
size_t count,
|
||||
cpp_substring_ranges *out,
|
||||
enum cpp_ttype type);
|
||||
extern bool cpp_interpret_string_notranslate (cpp_reader *,
|
||||
const cpp_string *, size_t,
|
||||
cpp_string *, enum cpp_ttype);
|
||||
|
@ -754,7 +754,9 @@ struct normalize_state
|
||||
extern bool _cpp_valid_ucn (cpp_reader *, const unsigned char **,
|
||||
const unsigned char *, int,
|
||||
struct normalize_state *state,
|
||||
cppchar_t *);
|
||||
cppchar_t *,
|
||||
source_range *char_range,
|
||||
cpp_string_location_reader *loc_reader);
|
||||
extern void _cpp_destroy_iconv (cpp_reader *);
|
||||
extern unsigned char *_cpp_convert_input (cpp_reader *, const char *,
|
||||
unsigned char *, size_t, size_t,
|
||||
|
@ -1247,7 +1247,7 @@ forms_identifier_p (cpp_reader *pfile, int first,
|
||||
cppchar_t s;
|
||||
buffer->cur += 2;
|
||||
if (_cpp_valid_ucn (pfile, &buffer->cur, buffer->rlimit, 1 + !first,
|
||||
state, &s))
|
||||
state, &s, NULL, NULL))
|
||||
return true;
|
||||
buffer->cur -= 2;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user