mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-16 19:51:23 +08:00
Fix overeager spelling corrections (PR c/82967)
This patch tunes class best_match's cutoff for rejecting meaningless spelling suggestions. Previously, we allowed an edit distance of up to half of the length of the longer of the goal string and closest candidate strings, rounded down. With this patch, we now allow only up to a third - with some tuning of rounding (and for very short strings), to ensure that: (a) everything that worked before still works (with the removal of a couple of cases that shouldn't), and that (b) the new threshold is always at least as conservative as the old threshold and thus shouldn't offer new nonsensical suggestions (with the possible exception of cases where transposition has helped; see r261521 aka Damerau-Levenshtein; PR other/69968). In particular, all of the bogus suggestions from PR c/82967 are now no longer offered. gcc/ChangeLog: PR c/82967 * spellcheck.c (get_edit_distance_cutoff): New function. (selftest::test_edit_distance_unit_test_oneway): Rename to... (selftest::test_get_edit_distance_one_way): ...this. (selftest::test_get_edit_distance_unit): Rename to... (selftest::test_get_edit_distance_both_ways): ...this. (selftest::test_edit_distances): Move tests to this new function, and test some more pairs of strings. Update for above renaming. (selftest::get_old_cutoff): New function. (selftest::test_get_edit_distance_cutoff): New function. (selftest::assert_suggested_for): New function. (ASSERT_SUGGESTED_FOR): New macro. (selftest::assert_not_suggested_for): New function. (ASSERT_NOT_SUGGESTED_FOR): New macro. (selftest::test_suggestions): New function. (selftest::spellcheck_c_tests): Move test_get_edit_distance_unit tests to selftest::test_edit_distances and call it. Add calls to selftest::test_get_edit_distance_cutoff and selftest::test_suggestions. * spellcheck.h (get_edit_distance_cutoff): New function declaration. (best_match::consider): Replace hard-coded cutoff calculation with a call to... (best_match::get_cutoff): New declaration. (best_match::get_best_meaningful_candidate): Likewise. gcc/testsuite/ChangeLog: PR c/82967 * c-c++-common/attributes-1.c: Remove bogus suggestion from dg-prune-output. * gcc.dg/diagnostic-token-ranges.c (undeclared_identifier): Remove bogus suggestion. * gcc.dg/spellcheck-identifiers-4.c: New test. From-SVN: r264335
This commit is contained in:
parent
2870a4da4f
commit
07f8790583
@ -1,3 +1,30 @@
|
||||
2018-09-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
PR c/82967
|
||||
* spellcheck.c (get_edit_distance_cutoff): New function.
|
||||
(selftest::test_edit_distance_unit_test_oneway): Rename to...
|
||||
(selftest::test_get_edit_distance_one_way): ...this.
|
||||
(selftest::test_get_edit_distance_unit): Rename to...
|
||||
(selftest::test_get_edit_distance_both_ways): ...this.
|
||||
(selftest::test_edit_distances): Move tests to this new function,
|
||||
and test some more pairs of strings. Update for above renaming.
|
||||
(selftest::get_old_cutoff): New function.
|
||||
(selftest::test_get_edit_distance_cutoff): New function.
|
||||
(selftest::assert_suggested_for): New function.
|
||||
(ASSERT_SUGGESTED_FOR): New macro.
|
||||
(selftest::assert_not_suggested_for): New function.
|
||||
(ASSERT_NOT_SUGGESTED_FOR): New macro.
|
||||
(selftest::test_suggestions): New function.
|
||||
(selftest::spellcheck_c_tests): Move test_get_edit_distance_unit
|
||||
tests to selftest::test_edit_distances and call it. Add calls to
|
||||
selftest::test_get_edit_distance_cutoff and
|
||||
selftest::test_suggestions.
|
||||
* spellcheck.h (get_edit_distance_cutoff): New function declaration.
|
||||
(best_match::consider): Replace hard-coded cutoff calculation with
|
||||
a call to...
|
||||
(best_match::get_cutoff): New declaration.
|
||||
(best_match::get_best_meaningful_candidate): Likewise.
|
||||
|
||||
2018-09-14 Bernd Edlinger <bernd.edlinger@hotmail.de>
|
||||
|
||||
* builtins.c (fold_builtin_strlen): Remove TODO comment.
|
||||
|
231
gcc/spellcheck.c
231
gcc/spellcheck.c
@ -162,6 +162,36 @@ find_closest_string (const char *target,
|
||||
return bm.get_best_meaningful_candidate ();
|
||||
}
|
||||
|
||||
/* Generate the maximum edit distance for which we consider a suggestion
|
||||
to be meaningful, given a goal of length GOAL_LEN and a candidate of
|
||||
length CANDIDATE_LEN.
|
||||
|
||||
This is a third of the the length of the candidate or of the goal,
|
||||
whichever is bigger. */
|
||||
|
||||
edit_distance_t
|
||||
get_edit_distance_cutoff (size_t goal_len, size_t candidate_len)
|
||||
{
|
||||
size_t max_length = MAX (goal_len, candidate_len);
|
||||
size_t min_length = MIN (goal_len, candidate_len);
|
||||
|
||||
gcc_assert (max_length >= min_length);
|
||||
|
||||
/* Special case: don't offer suggestions for a pair of
|
||||
length == 1 strings (or empty strings). */
|
||||
if (max_length <= 1)
|
||||
return 0;
|
||||
|
||||
/* If the lengths are close, then round down. */
|
||||
if (max_length - min_length <= 1)
|
||||
/* ...but allow an edit distance of at least 1. */
|
||||
return MAX (max_length / 3, 1);
|
||||
|
||||
/* Otherwise, round up (thus giving a little extra leeway to some cases
|
||||
involving insertions/deletions). */
|
||||
return (max_length + 2) / 3;
|
||||
}
|
||||
|
||||
#if CHECKING_P
|
||||
|
||||
namespace selftest {
|
||||
@ -171,8 +201,8 @@ namespace selftest {
|
||||
/* Verify that get_edit_distance (A, B) equals the expected value. */
|
||||
|
||||
static void
|
||||
test_edit_distance_unit_test_oneway (const char *a, const char *b,
|
||||
edit_distance_t expected)
|
||||
test_get_edit_distance_one_way (const char *a, const char *b,
|
||||
edit_distance_t expected)
|
||||
{
|
||||
edit_distance_t actual = get_edit_distance (a, b);
|
||||
ASSERT_EQ (actual, expected);
|
||||
@ -185,11 +215,169 @@ test_edit_distance_unit_test_oneway (const char *a, const char *b,
|
||||
equal the expected value, to ensure that the function is symmetric. */
|
||||
|
||||
static void
|
||||
test_get_edit_distance_unit (const char *a, const char *b,
|
||||
test_get_edit_distance_both_ways (const char *a, const char *b,
|
||||
edit_distance_t expected)
|
||||
{
|
||||
test_edit_distance_unit_test_oneway (a, b, expected);
|
||||
test_edit_distance_unit_test_oneway (b, a, expected);
|
||||
test_get_edit_distance_one_way (a, b, expected);
|
||||
test_get_edit_distance_one_way (b, a, expected);
|
||||
}
|
||||
|
||||
/* Verify get_edit_distance for a variety of pairs of pre-canned
|
||||
inputs, comparing against known-good values. */
|
||||
|
||||
static void
|
||||
test_edit_distances ()
|
||||
{
|
||||
test_get_edit_distance_both_ways ("", "nonempty", strlen ("nonempty"));
|
||||
test_get_edit_distance_both_ways ("saturday", "sunday", 3);
|
||||
test_get_edit_distance_both_ways ("foo", "m_foo", 2);
|
||||
test_get_edit_distance_both_ways ("hello_world", "HelloWorld", 3);
|
||||
test_get_edit_distance_both_ways
|
||||
("the quick brown fox jumps over the lazy dog", "dog", 40);
|
||||
test_get_edit_distance_both_ways
|
||||
("the quick brown fox jumps over the lazy dog",
|
||||
"the quick brown dog jumps over the lazy fox",
|
||||
4);
|
||||
test_get_edit_distance_both_ways
|
||||
("Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
|
||||
"All your base are belong to us",
|
||||
44);
|
||||
test_get_edit_distance_both_ways ("foo", "FOO", 3);
|
||||
test_get_edit_distance_both_ways ("fee", "deed", 2);
|
||||
test_get_edit_distance_both_ways ("coorzd1", "coordx1", 2);
|
||||
test_get_edit_distance_both_ways ("assert", "sqrt", 3);
|
||||
test_get_edit_distance_both_ways ("PATH_MAX", "INT8_MAX", 3);
|
||||
test_get_edit_distance_both_ways ("time", "nice", 2);
|
||||
test_get_edit_distance_both_ways ("bar", "carg", 2);
|
||||
test_get_edit_distance_both_ways ("gtk_widget_show_all",
|
||||
"GtkWidgetShowAll", 7);
|
||||
test_get_edit_distance_both_ways ("m_bar", "bar", 2);
|
||||
test_get_edit_distance_both_ways ("MACRO", "MACRAME", 3);
|
||||
test_get_edit_distance_both_ways ("ab", "ac", 1);
|
||||
test_get_edit_distance_both_ways ("ab", "a", 1);
|
||||
test_get_edit_distance_both_ways ("a", "b", 1);
|
||||
test_get_edit_distance_both_ways ("nanl", "name", 2);
|
||||
test_get_edit_distance_both_ways ("char", "bar", 2);
|
||||
test_get_edit_distance_both_ways ("-optimize", "fsanitize", 5);
|
||||
test_get_edit_distance_both_ways ("__DATE__", "__i386__", 4);
|
||||
|
||||
/* Examples where transposition helps. */
|
||||
test_get_edit_distance_both_ways ("ab", "ba", 1);
|
||||
test_get_edit_distance_both_ways ("ba", "abc", 2);
|
||||
test_get_edit_distance_both_ways ("coorzd1", "coordz1", 1);
|
||||
test_get_edit_distance_both_ways ("abcdefghijklmnopqrstuvwxyz",
|
||||
"bacdefghijklmnopqrstuvwxzy", 2);
|
||||
test_get_edit_distance_both_ways ("saturday", "sundya", 4);
|
||||
test_get_edit_distance_both_ways ("signed", "singed", 1);
|
||||
}
|
||||
|
||||
/* Subroutine of test_get_edit_distance_cutoff, for emulating the
|
||||
spellchecking cutoff in up to GCC 8. */
|
||||
|
||||
static edit_distance_t
|
||||
get_old_cutoff (size_t goal_len, size_t candidate_len)
|
||||
{
|
||||
return MAX (goal_len, candidate_len) / 2;
|
||||
}
|
||||
|
||||
/* Verify that the cutoff for "meaningfulness" of suggestions is at least as
|
||||
conservative as in older GCC releases.
|
||||
|
||||
This should ensure that we don't offer additional meaningless
|
||||
suggestions (apart from those for which transposition has helped). */
|
||||
|
||||
static void
|
||||
test_get_edit_distance_cutoff ()
|
||||
{
|
||||
for (size_t goal_len = 0; goal_len < 30; goal_len++)
|
||||
for (size_t candidate_len = 0; candidate_len < 30; candidate_len++)
|
||||
ASSERT_TRUE (get_edit_distance_cutoff (goal_len, candidate_len)
|
||||
<= get_old_cutoff (goal_len, candidate_len));
|
||||
}
|
||||
|
||||
/* Assert that CANDIDATE is offered as a suggestion for TARGET. */
|
||||
|
||||
static void
|
||||
assert_suggested_for (const location &loc, const char *candidate,
|
||||
const char *target)
|
||||
{
|
||||
auto_vec<const char *> candidates;
|
||||
candidates.safe_push (candidate);
|
||||
ASSERT_EQ_AT (loc, candidate, find_closest_string (target, &candidates));
|
||||
}
|
||||
|
||||
/* Assert that CANDIDATE is offered as a suggestion for TARGET. */
|
||||
|
||||
#define ASSERT_SUGGESTED_FOR(CANDIDATE, TARGET) \
|
||||
SELFTEST_BEGIN_STMT \
|
||||
assert_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \
|
||||
SELFTEST_END_STMT
|
||||
|
||||
/* Assert that CANDIDATE is not offered as a suggestion for TARGET. */
|
||||
|
||||
static void
|
||||
assert_not_suggested_for (const location &loc, const char *candidate,
|
||||
const char *target)
|
||||
{
|
||||
auto_vec<const char *> candidates;
|
||||
candidates.safe_push (candidate);
|
||||
ASSERT_EQ_AT (loc, NULL, find_closest_string (target, &candidates));
|
||||
}
|
||||
|
||||
/* Assert that CANDIDATE is not offered as a suggestion for TARGET. */
|
||||
|
||||
#define ASSERT_NOT_SUGGESTED_FOR(CANDIDATE, TARGET) \
|
||||
SELFTEST_BEGIN_STMT \
|
||||
assert_not_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \
|
||||
SELFTEST_END_STMT
|
||||
|
||||
/* Verify that we offer varous suggestions that are meaningful,
|
||||
and that we don't offer various other ones that aren't (PR c/82967). */
|
||||
|
||||
static void
|
||||
test_suggestions ()
|
||||
{
|
||||
/* Good suggestions. */
|
||||
|
||||
ASSERT_SUGGESTED_FOR ("m_bar", "bar");
|
||||
// dist == 2, max_length == 5, min_length == 3
|
||||
|
||||
ASSERT_SUGGESTED_FOR ("MACRO", "MACRAME");
|
||||
// dist == 3, max_length == 7, min_length == 5
|
||||
|
||||
ASSERT_SUGGESTED_FOR ("gtk_widget_show_all", "GtkWidgetShowAll");
|
||||
// dist == 7, max_length == 16, min_length = 19
|
||||
|
||||
ASSERT_SUGGESTED_FOR ("ab", "ac");
|
||||
// dist == 1, max_length == min_length = 2
|
||||
|
||||
ASSERT_SUGGESTED_FOR ("ab", "a");
|
||||
// dist == 1, max_length == 2, min_length = 1
|
||||
|
||||
/* Bad suggestions. */
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("a", "b");
|
||||
// dist == 1, max_length == min_length = 1
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("sqrt", "assert");
|
||||
// dist == 3, max_length 6, min_length == 4
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("INT8_MAX", "PATH_MAX");
|
||||
// dist == 3, max_length == min_length == 8
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("nice", "time");
|
||||
ASSERT_NOT_SUGGESTED_FOR ("nanl", "name");
|
||||
// dist == 2, max_length == min_length == 4
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("carg", "bar");
|
||||
ASSERT_NOT_SUGGESTED_FOR ("char", "bar");
|
||||
// dist == 2, max_length == 4, min_length == 3
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("-optimize", "fsanitize");
|
||||
// dist == 5, max_length == min_length == 9
|
||||
|
||||
ASSERT_NOT_SUGGESTED_FOR ("__DATE__", "__i386__");
|
||||
// dist == 4, max_length == min_length == 8
|
||||
}
|
||||
|
||||
/* Verify that find_closest_string is sane. */
|
||||
@ -291,39 +479,14 @@ test_metric_conditions ()
|
||||
}
|
||||
}
|
||||
|
||||
/* Verify get_edit_distance for a variety of pairs of pre-canned
|
||||
inputs, comparing against known-good values. */
|
||||
/* Run all of the selftests within this file. */
|
||||
|
||||
void
|
||||
spellcheck_c_tests ()
|
||||
{
|
||||
test_get_edit_distance_unit ("", "nonempty", strlen ("nonempty"));
|
||||
test_get_edit_distance_unit ("saturday", "sunday", 3);
|
||||
test_get_edit_distance_unit ("foo", "m_foo", 2);
|
||||
test_get_edit_distance_unit ("hello_world", "HelloWorld", 3);
|
||||
test_get_edit_distance_unit
|
||||
("the quick brown fox jumps over the lazy dog", "dog", 40);
|
||||
test_get_edit_distance_unit
|
||||
("the quick brown fox jumps over the lazy dog",
|
||||
"the quick brown dog jumps over the lazy fox",
|
||||
4);
|
||||
test_get_edit_distance_unit
|
||||
("Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
|
||||
"All your base are belong to us",
|
||||
44);
|
||||
test_get_edit_distance_unit ("foo", "FOO", 3);
|
||||
test_get_edit_distance_unit ("fee", "deed", 2);
|
||||
test_get_edit_distance_unit ("coorzd1", "coordx1", 2);
|
||||
|
||||
/* Examples where transposition helps. */
|
||||
test_get_edit_distance_unit ("ab", "ba", 1);
|
||||
test_get_edit_distance_unit ("ba", "abc", 2);
|
||||
test_get_edit_distance_unit ("coorzd1", "coordz1", 1);
|
||||
test_get_edit_distance_unit ("abcdefghijklmnopqrstuvwxyz",
|
||||
"bacdefghijklmnopqrstuvwxzy", 2);
|
||||
test_get_edit_distance_unit ("saturday", "sundya", 4);
|
||||
test_get_edit_distance_unit ("signed", "singed", 1);
|
||||
|
||||
test_edit_distances ();
|
||||
test_get_edit_distance_cutoff ();
|
||||
test_suggestions ();
|
||||
test_find_closest_string ();
|
||||
test_metric_conditions ();
|
||||
}
|
||||
|
@ -66,6 +66,9 @@ struct edit_distance_traits<const char *>
|
||||
}
|
||||
};
|
||||
|
||||
extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
|
||||
size_t candidate_len);
|
||||
|
||||
/* A type for use when determining the best match against a string,
|
||||
expressed as a template so that we can match against various
|
||||
string-like types (const char *, frontend identifiers, and preprocessor
|
||||
@ -119,7 +122,7 @@ class best_match
|
||||
/* If the candidate will be unable to beat the criterion in
|
||||
get_best_meaningful_candidate, reject it without computing
|
||||
the exact distance. */
|
||||
unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
|
||||
edit_distance_t cutoff = get_cutoff (candidate_len);
|
||||
if (min_candidate_distance > cutoff)
|
||||
return;
|
||||
|
||||
@ -151,17 +154,25 @@ class best_match
|
||||
m_best_candidate_len = best_candidate_len;
|
||||
}
|
||||
|
||||
/* Generate the maximum edit distance for which we consider a suggestion
|
||||
to be meaningful, given a candidate of length CANDIDATE_LEN. */
|
||||
|
||||
edit_distance_t get_cutoff (size_t candidate_len) const
|
||||
{
|
||||
return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
|
||||
}
|
||||
|
||||
/* Get the best candidate so far, but applying a filter to ensure
|
||||
that we return NULL if none of the candidates are close to the goal,
|
||||
to avoid offering nonsensical suggestions to the user. */
|
||||
|
||||
candidate_t get_best_meaningful_candidate () const
|
||||
{
|
||||
/* If more than half of the letters were misspelled, the suggestion is
|
||||
likely to be meaningless. */
|
||||
/* If the edit distance is too high, the suggestion is likely to be
|
||||
meaningless. */
|
||||
if (m_best_candidate)
|
||||
{
|
||||
unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
|
||||
edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
|
||||
if (m_best_distance > cutoff)
|
||||
return NULL;
|
||||
}
|
||||
|
@ -1,3 +1,12 @@
|
||||
2018-09-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
PR c/82967
|
||||
* c-c++-common/attributes-1.c: Remove bogus suggestion from
|
||||
dg-prune-output.
|
||||
* gcc.dg/diagnostic-token-ranges.c (undeclared_identifier): Remove
|
||||
bogus suggestion.
|
||||
* gcc.dg/spellcheck-identifiers-4.c: New test.
|
||||
|
||||
2018-09-14 Bernd Edlinger <bernd.edlinger@hotmail.de>
|
||||
|
||||
* gcc.dg/warn-strlen-no-nul.c: Add some missing test cases.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-prune-output "undeclared here \\(not in a function\\); did you mean .char..|\[^\n\r\]* was not declared in this scope" } */
|
||||
/* { dg-prune-output "undeclared here \\(not in a function\\)|\[^\n\r\]* was not declared in this scope" } */
|
||||
|
||||
void* my_calloc(unsigned, unsigned) __attribute__((alloc_size(1,bar))); /* { dg-warning "outside range" } */
|
||||
void* my_realloc(void*, unsigned) __attribute__((alloc_size(bar))); /* { dg-warning "outside range" } */
|
||||
|
@ -8,12 +8,11 @@ long double nanl (const char *);
|
||||
|
||||
void undeclared_identifier (void)
|
||||
{
|
||||
name; /* { dg-error "'name' undeclared .first use in this function.; did you mean .nanl." } */
|
||||
name; /* { dg-error "'name' undeclared" } */
|
||||
/*
|
||||
{ dg-begin-multiline-output "" }
|
||||
name;
|
||||
^~~~
|
||||
nanl
|
||||
{ dg-end-multiline-output "" }
|
||||
*/
|
||||
}
|
||||
|
10
gcc/testsuite/gcc.dg/spellcheck-identifiers-4.c
Normal file
10
gcc/testsuite/gcc.dg/spellcheck-identifiers-4.c
Normal file
@ -0,0 +1,10 @@
|
||||
/* { dg-options "-Wimplicit-function-declaration" } */
|
||||
|
||||
extern double sqrt (double);
|
||||
|
||||
void test (float pf, float inff)
|
||||
{
|
||||
assert (pf == inff); /* { dg-bogus "sqrt" } */
|
||||
/* { dg-warning "implicit declaration of function 'assert'" "" { target *-*-* } .-1 } */
|
||||
/* { dg-message "header '<assert.h>'" "" { target *-*-* } .-2 } */
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user