2
0
mirror of git://gcc.gnu.org/git/gcc.git synced 2025-04-18 04:00:24 +08:00

re PR preprocessor/78680 (ICE in get_substring_ranges_for_loc, at input.c:1398)

Fix for PR preprocessor/78680

PR preprocessor/78680 identifies a crash when attempting to issue
a -Wformat warning, where the format string includes a string token
split across multiple physical source lines via backslash-continued
lines.

The issue is that libcpp is generating bogus range information for
such tokens.

For example, in:

void fn1() {
  __builtin_printf("\
     %ld.\n\
        2\n"); };

the range of the string token is printed as:

   __builtin_printf("\
                    ^~

whereas the range ought to be:

  __builtin_printf("\
                   ^~
     %ld.\n\
     ~~~~~~~
        2\n"); };
        ~~~~

The root cause is that the line notes expressing the update
of the buffer in lex.c aren't yet updated when the end-point of
the token is computed

3095	    tok_range.m_finish
3096	      = linemap_position_for_column (pfile->line_table,
3097					     CPP_BUF_COLUMN (buffer, buffer->cur));

so that the physical line is still regarded as that of the start
of the token, and, where CPP_BUF_COLUMN uses (BUF)->line_base,
line_base is still the location of the first physical line in the
and hence the column information is too large (as if it were the
offset in the *logical* line).

(the printed range is somewhat misleading; the actual buggy range
extends beyond the "\ in the line, but within diagnostic-show-locus.c
layout::print_annotation_line only prints up to the xbound set by
layout::print_source_line and so truncates most of the buggy range).

The fix is to ensure that line notes are handled before calculating
the end-point of the token range.

This leads to the range for the string token being correctly
computed, as:

  __builtin_printf("\
                   ^~
     %ld.\n\
     ~~~~~~~
        2\n"); };
        ~~~~

and this leads to get_substring_ranges_for_loc failing gracefully,
rather than crashing.

gcc/testsuite/ChangeLog:
	PR preprocessor/78680
	* gcc.dg/format/pr78680.c: New test case.
	* gcc.dg/plugin/diagnostic-test-expressions-1.c
	(test_multiline_token): New function.
	* gcc.dg/plugin/diagnostic-test-string-literals-1.c
	(test_backslash_continued_logical_lines): New function.

libcpp/ChangeLog:
	PR preprocessor/78680
	* lex.c (_cpp_lex_direct): Ensure line notes are processed before
	computing the end-point of the token.

From-SVN: r243567
This commit is contained in:
David Malcolm 2016-12-12 17:37:48 +00:00 committed by David Malcolm
parent 8e09a726e6
commit 470a60b2c4
6 changed files with 77 additions and 0 deletions

@ -1,3 +1,12 @@
2016-12-12 David Malcolm <dmalcolm@redhat.com>
PR preprocessor/78680
* gcc.dg/format/pr78680.c: New test case.
* gcc.dg/plugin/diagnostic-test-expressions-1.c
(test_multiline_token): New function.
* gcc.dg/plugin/diagnostic-test-string-literals-1.c
(test_backslash_continued_logical_lines): New function.
2016-12-12 Nathan Sidwell <nathan@acm.org>
PR c++/78252

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O2 -Wall -Wextra -fdiagnostics-show-caret" } */
void fn1() {
__builtin_printf("\
%ld.\n\
2\n"); };
/* { dg-warning "expects a matching" "" { target *-*-* } .-3 } */
/* { dg-begin-multiline-output "" }
__builtin_printf("\
^~
%ld.\n\
~~~~~~~
2\n"); };
~~~~
{ dg-end-multiline-output "" } */

@ -689,3 +689,22 @@ void test_multiple_ordinary_maps (void)
~~
{ dg-end-multiline-output "" } */
}
/* Verify that we correctly handle a token that spans multiple
physical lines. */
const char *test_multiline_token (void)
{
__emit_expression_range (0, "foo\
bar\
baz");
/* { dg-warning "range" "" { target *-*-* } .-3 } */
/* { dg-begin-multiline-output "" }
__emit_expression_range (0, "foo\
^~~~~
bar\
~~~~
baz");
~~~~
{ dg-end-multiline-output "" } */
}

@ -272,3 +272,23 @@ test_terminator_location (void)
^
{ dg-end-multiline-output "" } */
}
/* Verify that we fail gracefully when a string literal token is split
across multiple physical lines. */
void
test_backslash_continued_logical_lines (void)
{
__emit_string_literal_range ("\
01234\
56789", 6, 6, 7);
/* { dg-error "unable to read substring location: range endpoints are on different lines" "" { target *-*-* } .-3 } */
/* { dg-begin-multiline-output "" }
__emit_string_literal_range ("\
^~
01234\
~~~~~~
56789", 6, 6, 7);
~~~~~~
{ dg-end-multiline-output "" } */
}

@ -1,3 +1,9 @@
2016-12-12 David Malcolm <dmalcolm@redhat.com>
PR preprocessor/78680
* lex.c (_cpp_lex_direct): Ensure line notes are processed before
computing the end-point of the token.
2016-11-23 Paolo Bonzini <bonzini@gnu.org>
* include/cpplib.h (struct cpp_options): Add new member

@ -3089,6 +3089,13 @@ _cpp_lex_direct (cpp_reader *pfile)
break;
}
/* Ensure that any line notes are processed, so that we have the
correct physical line/column for the end-point of the token even
when a logical line is split via one or more backslashes. */
if (buffer->cur >= buffer->notes[buffer->cur_note].pos
&& !pfile->overlaid_buffer)
_cpp_process_line_notes (pfile, false);
source_range tok_range;
tok_range.m_start = result->src_loc;
if (result->src_loc >= RESERVED_LOCATION_COUNT)