mirror of
git://gcc.gnu.org/git/gcc.git
synced 2025-04-21 16:00:58 +08:00
spellcheck.h: add best_match template; implement early-reject
gcc/c/ChangeLog: * c-typeck.c: Include spellcheck-tree.h rather than spellcheck.h. gcc/cp/ChangeLog: * search.c: Include spellcheck-tree.h rather than spellcheck.h. gcc/ChangeLog: * spellcheck-tree.c: Include spellcheck-tree.h rather than spellcheck.h. (find_closest_identifier): Reimplement in terms of best_match<tree,tree>. * spellcheck-tree.h: New file. * spellcheck.c (struct edit_distance_traits<const char *>): New struct. (find_closest_string): Reimplement in terms of best_match<const char *, const char *>. * spellcheck.h (levenshtein_distance): Move prototype of tree-based overload to spellcheck-tree.h. (find_closest_identifier): Likewise. (struct edit_distance_traits<T>): New template. (class best_match): New class. From-SVN: r237471
This commit is contained in:
parent
9e990b5d19
commit
6a3f203c3c
@ -1,3 +1,20 @@
|
||||
2016-06-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* spellcheck-tree.c: Include spellcheck-tree.h rather than
|
||||
spellcheck.h.
|
||||
(find_closest_identifier): Reimplement in terms of
|
||||
best_match<tree,tree>.
|
||||
* spellcheck-tree.h: New file.
|
||||
* spellcheck.c (struct edit_distance_traits<const char *>): New
|
||||
struct.
|
||||
(find_closest_string): Reimplement in terms of
|
||||
best_match<const char *, const char *>.
|
||||
* spellcheck.h (levenshtein_distance): Move prototype of tree-based
|
||||
overload to spellcheck-tree.h.
|
||||
(find_closest_identifier): Likewise.
|
||||
(struct edit_distance_traits<T>): New template.
|
||||
(class best_match): New class.
|
||||
|
||||
2016-06-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* selftest-run-tests.c (selftest::run_tests): Call
|
||||
|
@ -1,3 +1,7 @@
|
||||
2016-06-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* c-typeck.c: Include spellcheck-tree.h rather than spellcheck.h.
|
||||
|
||||
2016-06-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* c-typeck.c (build_component_ref): Simplify fixit code by
|
||||
|
@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "c-family/c-ubsan.h"
|
||||
#include "cilk.h"
|
||||
#include "gomp-constants.h"
|
||||
#include "spellcheck.h"
|
||||
#include "spellcheck-tree.h"
|
||||
#include "gcc-rich-location.h"
|
||||
|
||||
/* Possible cases of implicit bad conversions. Used to select
|
||||
|
@ -1,3 +1,7 @@
|
||||
2016-06-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* search.c: Include spellcheck-tree.h rather than spellcheck.h.
|
||||
|
||||
2016-06-14 David Malcolm <dmalcolm@redhat.com>
|
||||
|
||||
* typeck.c: Include "gcc-rich-location.h".
|
||||
|
@ -27,7 +27,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "cp-tree.h"
|
||||
#include "intl.h"
|
||||
#include "toplev.h"
|
||||
#include "spellcheck.h"
|
||||
#include "spellcheck-tree.h"
|
||||
|
||||
static int is_subobject_of_p (tree, tree);
|
||||
static tree dfs_lookup_base (tree, void *);
|
||||
|
@ -22,7 +22,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
#include "coretypes.h"
|
||||
#include "tm.h"
|
||||
#include "tree.h"
|
||||
#include "spellcheck.h"
|
||||
#include "spellcheck-tree.h"
|
||||
#include "selftest.h"
|
||||
#include "stringpool.h"
|
||||
|
||||
@ -53,32 +53,16 @@ find_closest_identifier (tree target, const auto_vec<tree> *candidates)
|
||||
{
|
||||
gcc_assert (TREE_CODE (target) == IDENTIFIER_NODE);
|
||||
|
||||
best_match<tree, tree> bm (target);
|
||||
int i;
|
||||
tree identifier;
|
||||
tree best_identifier = NULL_TREE;
|
||||
edit_distance_t best_distance = MAX_EDIT_DISTANCE;
|
||||
FOR_EACH_VEC_ELT (*candidates, i, identifier)
|
||||
{
|
||||
gcc_assert (TREE_CODE (identifier) == IDENTIFIER_NODE);
|
||||
edit_distance_t dist = levenshtein_distance (target, identifier);
|
||||
if (dist < best_distance)
|
||||
{
|
||||
best_distance = dist;
|
||||
best_identifier = identifier;
|
||||
}
|
||||
bm.consider (identifier);
|
||||
}
|
||||
|
||||
/* If more than half of the letters were misspelled, the suggestion is
|
||||
likely to be meaningless. */
|
||||
if (best_identifier)
|
||||
{
|
||||
unsigned int cutoff = MAX (IDENTIFIER_LENGTH (target),
|
||||
IDENTIFIER_LENGTH (best_identifier)) / 2;
|
||||
if (best_distance > cutoff)
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
return best_identifier;
|
||||
return bm.get_best_meaningful_candidate ();
|
||||
}
|
||||
|
||||
#if CHECKING_P
|
||||
|
51
gcc/spellcheck-tree.h
Normal file
51
gcc/spellcheck-tree.h
Normal file
@ -0,0 +1,51 @@
|
||||
/* Find near-matches for identifiers.
|
||||
Copyright (C) 2015-2016 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#ifndef GCC_SPELLCHECK_TREE_H
|
||||
#define GCC_SPELLCHECK_TREE_H
|
||||
|
||||
#include "spellcheck.h"
|
||||
|
||||
/* spellcheck-tree.c */
|
||||
|
||||
extern edit_distance_t
|
||||
levenshtein_distance (tree ident_s, tree ident_t);
|
||||
|
||||
extern tree
|
||||
find_closest_identifier (tree target, const auto_vec<tree> *candidates);
|
||||
|
||||
/* Specialization of edit_distance_traits for identifiers. */
|
||||
|
||||
template <>
|
||||
struct edit_distance_traits<tree>
|
||||
{
|
||||
static size_t get_length (tree id)
|
||||
{
|
||||
gcc_assert (TREE_CODE (id) == IDENTIFIER_NODE);
|
||||
return IDENTIFIER_LENGTH (id);
|
||||
}
|
||||
|
||||
static const char *get_string (tree id)
|
||||
{
|
||||
gcc_assert (TREE_CODE (id) == IDENTIFIER_NODE);
|
||||
return IDENTIFIER_POINTER (id);
|
||||
}
|
||||
};
|
||||
|
||||
#endif /* GCC_SPELLCHECK_TREE_H */
|
@ -121,6 +121,24 @@ levenshtein_distance (const char *s, const char *t)
|
||||
return levenshtein_distance (s, strlen (s), t, strlen (t));
|
||||
}
|
||||
|
||||
/* Specialization of edit_distance_traits for C-style strings. */
|
||||
|
||||
template <>
|
||||
struct edit_distance_traits<const char *>
|
||||
{
|
||||
static size_t get_length (const char *str)
|
||||
{
|
||||
gcc_assert (str);
|
||||
return strlen (str);
|
||||
}
|
||||
|
||||
static const char *get_string (const char *str)
|
||||
{
|
||||
gcc_assert (str);
|
||||
return str;
|
||||
}
|
||||
};
|
||||
|
||||
/* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
|
||||
an autovec of non-NULL strings, determine which element within
|
||||
CANDIDATES has the lowest edit distance to TARGET. If there are
|
||||
@ -139,32 +157,14 @@ find_closest_string (const char *target,
|
||||
|
||||
int i;
|
||||
const char *candidate;
|
||||
const char *best_candidate = NULL;
|
||||
edit_distance_t best_distance = MAX_EDIT_DISTANCE;
|
||||
size_t len_target = strlen (target);
|
||||
best_match<const char *, const char *> bm (target);
|
||||
FOR_EACH_VEC_ELT (*candidates, i, candidate)
|
||||
{
|
||||
gcc_assert (candidate);
|
||||
edit_distance_t dist
|
||||
= levenshtein_distance (target, len_target,
|
||||
candidate, strlen (candidate));
|
||||
if (dist < best_distance)
|
||||
{
|
||||
best_distance = dist;
|
||||
best_candidate = candidate;
|
||||
}
|
||||
bm.consider (candidate);
|
||||
}
|
||||
|
||||
/* If more than half of the letters were misspelled, the suggestion is
|
||||
likely to be meaningless. */
|
||||
if (best_candidate)
|
||||
{
|
||||
unsigned int cutoff = MAX (len_target, strlen (best_candidate)) / 2;
|
||||
if (best_distance > cutoff)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return best_candidate;
|
||||
return bm.get_best_meaningful_candidate ();
|
||||
}
|
||||
|
||||
#if CHECKING_P
|
||||
|
110
gcc/spellcheck.h
110
gcc/spellcheck.h
@ -35,12 +35,112 @@ extern const char *
|
||||
find_closest_string (const char *target,
|
||||
const auto_vec<const char *> *candidates);
|
||||
|
||||
/* spellcheck-tree.c */
|
||||
/* A traits class for describing a string-like type usable by
|
||||
class best_match.
|
||||
Specializations should provide the implementations of the following:
|
||||
|
||||
extern edit_distance_t
|
||||
levenshtein_distance (tree ident_s, tree ident_t);
|
||||
static size_t get_length (TYPE);
|
||||
static const char *get_string (TYPE);
|
||||
|
||||
extern tree
|
||||
find_closest_identifier (tree target, const auto_vec<tree> *candidates);
|
||||
get_string should return a non-NULL ptr, which does not need to be
|
||||
0-terminated. */
|
||||
|
||||
template <typename TYPE>
|
||||
struct edit_distance_traits {};
|
||||
|
||||
/* A type for use when determining the best match against a string,
|
||||
expressed as a template so that we can match against various
|
||||
string-like types (const char *, frontend identifiers, and preprocessor
|
||||
macros).
|
||||
|
||||
This type accumulates the best possible match against GOAL_TYPE for
|
||||
a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
|
||||
number of calls to levenshtein_distance and to
|
||||
edit_distance_traits<T>::get_length. */
|
||||
|
||||
template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
|
||||
class best_match
|
||||
{
|
||||
public:
|
||||
typedef GOAL_TYPE goal_t;
|
||||
typedef CANDIDATE_TYPE candidate_t;
|
||||
typedef edit_distance_traits<goal_t> goal_traits;
|
||||
typedef edit_distance_traits<candidate_t> candidate_traits;
|
||||
|
||||
/* Constructor. */
|
||||
|
||||
best_match (goal_t goal)
|
||||
: m_goal (goal_traits::get_string (goal)),
|
||||
m_goal_len (goal_traits::get_length (goal)),
|
||||
m_best_candidate (NULL),
|
||||
m_best_distance (MAX_EDIT_DISTANCE)
|
||||
{}
|
||||
|
||||
/* Compare the edit distance between CANDIDATE and m_goal,
|
||||
and if it's the best so far, record it. */
|
||||
|
||||
void consider (candidate_t candidate)
|
||||
{
|
||||
size_t candidate_len = candidate_traits::get_length (candidate);
|
||||
|
||||
/* Calculate a lower bound on the candidate's distance to the goal,
|
||||
based on the difference in lengths; it will require at least
|
||||
this many insertions/deletions. */
|
||||
edit_distance_t min_candidate_distance
|
||||
= abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
|
||||
|
||||
/* If the candidate's length is sufficiently different to that
|
||||
of the goal string, then the number of insertions/deletions
|
||||
may be >= the best distance so far. If so, we can reject
|
||||
the candidate immediately without needing to compute
|
||||
the exact distance, since it won't be an improvement. */
|
||||
if (min_candidate_distance >= m_best_distance)
|
||||
return;
|
||||
|
||||
/* If the candidate will be unable to beat the criterion in
|
||||
get_best_meaningful_candidate, reject it without computing
|
||||
the exact distance. */
|
||||
unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
|
||||
if (min_candidate_distance > cutoff)
|
||||
return;
|
||||
|
||||
/* Otherwise, compute the distance and see if the candidate
|
||||
has beaten the previous best value. */
|
||||
edit_distance_t dist
|
||||
= levenshtein_distance (m_goal, m_goal_len,
|
||||
candidate_traits::get_string (candidate),
|
||||
candidate_len);
|
||||
if (dist < m_best_distance)
|
||||
{
|
||||
m_best_distance = dist;
|
||||
m_best_candidate = candidate;
|
||||
m_best_candidate_len = candidate_len;
|
||||
}
|
||||
}
|
||||
|
||||
/* Get the best candidate so far, but applying a filter to ensure
|
||||
that we return NULL if none of the candidates are close to the goal,
|
||||
to avoid offering nonsensical suggestions to the user. */
|
||||
|
||||
candidate_t get_best_meaningful_candidate () const
|
||||
{
|
||||
/* If more than half of the letters were misspelled, the suggestion is
|
||||
likely to be meaningless. */
|
||||
if (m_best_candidate)
|
||||
{
|
||||
unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
|
||||
if (m_best_distance > cutoff)
|
||||
return NULL;
|
||||
}
|
||||
return m_best_candidate;
|
||||
}
|
||||
|
||||
private:
|
||||
const char *m_goal;
|
||||
size_t m_goal_len;
|
||||
candidate_t m_best_candidate;
|
||||
edit_distance_t m_best_distance;
|
||||
size_t m_best_candidate_len;
|
||||
};
|
||||
|
||||
#endif /* GCC_SPELLCHECK_H */
|
||||
|
Loading…
x
Reference in New Issue
Block a user