mirror of
https://sourceware.org/git/binutils-gdb.git
synced 2024-12-15 04:31:49 +08:00
67eca1ccc1
I came across a table containing common misspellings [1], and wrote a script to detect and correct these misspellings. The table also contains entries that have alternatives, like this: ... addres->address, adders ... and for those the script prints a TODO instead. The script downloads the webpage containing the table, extracts the table and caches it in .git/wikipedia-common-misspellings.txt to prevent downloading it over and over again. Example usage: ... $ gdb/contrib/spellcheck.sh gdb* ... ChangeLog files are silently skipped. Checked with shellcheck. Tested on x86_64-linux, by running it on the gdb* dirs on doing a build and test run. The results of running it are in the two following patches. Reviewed-By: Andrew Burgess <aburgess@redhat.com> Approved-By: Tom Tromey <tom@tromey.com> [1] https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
288 lines
5.1 KiB
Bash
Executable File
288 lines
5.1 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Copyright (C) 2024 Free Software Foundation, Inc.
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
# Script to auto-correct common spelling mistakes.
|
|
#
|
|
# Example usage:
|
|
# $ ./gdb/contrib/spellcheck.sh gdb*
|
|
|
|
scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
|
|
|
|
url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
|
|
cache_dir=$scriptdir/../../.git
|
|
cache_file=wikipedia-common-misspellings.txt
|
|
dictionary=$cache_dir/$cache_file
|
|
|
|
# Separators: space, slash, tab.
|
|
grep_separator=" |/| "
|
|
sed_separator=" \|/\|\t"
|
|
|
|
usage ()
|
|
{
|
|
echo "usage: $(basename "$0") <file|dir>+"
|
|
}
|
|
|
|
make_absolute ()
|
|
{
|
|
local arg
|
|
arg="$1"
|
|
|
|
case "$arg" in
|
|
/*)
|
|
;;
|
|
*)
|
|
arg=$(pwd -P)/"$arg"
|
|
;;
|
|
esac
|
|
|
|
echo "$arg"
|
|
}
|
|
|
|
parse_args ()
|
|
{
|
|
local files
|
|
files=$(mktemp)
|
|
trap 'rm -f "$files"' EXIT
|
|
|
|
if [ $# -eq -0 ]; then
|
|
usage
|
|
exit 1
|
|
fi
|
|
|
|
local arg
|
|
for arg in "$@"; do
|
|
if [ -f "$arg" ]; then
|
|
arg=$(make_absolute "$arg")
|
|
readlink -e "$arg" \
|
|
>> "$files"
|
|
elif [ -d "$arg" ]; then
|
|
arg=$(make_absolute "$arg")
|
|
local f
|
|
find "$arg" -type f -exec readlink -e {} \; \
|
|
>> "$files"
|
|
else
|
|
echo "Not a file or directory: $arg"
|
|
exit 1
|
|
fi
|
|
done
|
|
|
|
mapfile -t unique_files \
|
|
< <(sort -u "$files" \
|
|
| grep -v ChangeLog)
|
|
|
|
rm -f "$files"
|
|
trap "" EXIT
|
|
}
|
|
|
|
get_dictionary ()
|
|
{
|
|
if [ -f "$dictionary" ]; then
|
|
return
|
|
fi
|
|
|
|
local webpage
|
|
webpage=$(mktemp)
|
|
trap 'rm -f "$webpage"' EXIT
|
|
|
|
# Download web page containing table.
|
|
wget $url -O "$webpage"
|
|
|
|
# Extract table from web page.
|
|
awk '/<pre>/,/<\/pre>/' "$webpage" \
|
|
| sed 's/<pre>//;s/<\/pre>//' \
|
|
| grep -E -v "^$" \
|
|
> "$dictionary"
|
|
|
|
rm -f "$webpage"
|
|
trap "" EXIT
|
|
}
|
|
|
|
parse_dictionary ()
|
|
{
|
|
# Parse dictionary.
|
|
mapfile -t words \
|
|
< <(awk -F '->' '{print $1}' "$dictionary")
|
|
mapfile -t replacements \
|
|
< <(awk -F '->' '{print $2}' "$dictionary")
|
|
}
|
|
|
|
find_files_matching_words ()
|
|
{
|
|
local pat
|
|
pat=""
|
|
for word in "${words[@]}"; do
|
|
if [ "$pat" = "" ]; then
|
|
pat="$word"
|
|
else
|
|
pat="$pat|$word"
|
|
fi
|
|
done
|
|
pat="($pat)"
|
|
|
|
local sep
|
|
sep=$grep_separator
|
|
|
|
pat="(^|$sep)$pat($sep|$)"
|
|
|
|
grep -E \
|
|
-l \
|
|
"$pat" \
|
|
"$@"
|
|
}
|
|
|
|
find_files_matching_word ()
|
|
{
|
|
local pat
|
|
pat="$1"
|
|
shift
|
|
|
|
local sep
|
|
sep=$grep_separator
|
|
|
|
pat="(^|$sep)$pat($sep|$)"
|
|
|
|
grep -E \
|
|
-l \
|
|
"$pat" \
|
|
"$@"
|
|
}
|
|
|
|
replace_word_in_file ()
|
|
{
|
|
local word
|
|
word="$1"
|
|
|
|
local replacement
|
|
replacement="$2"
|
|
|
|
local file
|
|
file="$3"
|
|
|
|
local sep
|
|
sep=$sed_separator
|
|
|
|
# Save separator.
|
|
sep="\($sep\)"
|
|
|
|
local repl1 repl2 repl3
|
|
|
|
repl1="s%$sep$word$sep%\1$replacement\2%g"
|
|
|
|
repl2="s%^$word$sep%$replacement\1%"
|
|
|
|
repl3="s%$sep$word$%\1$replacement%"
|
|
|
|
sed -i \
|
|
"$repl1;$repl2;$repl3" \
|
|
"$file"
|
|
}
|
|
|
|
replace_word_in_files ()
|
|
{
|
|
local word
|
|
word="$1"
|
|
|
|
local replacement
|
|
replacement="$2"
|
|
|
|
shift 2
|
|
|
|
local id
|
|
id="$word -> $replacement"
|
|
|
|
# Reduce set of files for sed to operate on.
|
|
local files_matching_word
|
|
declare -a files_matching_word
|
|
mapfile -t files_matching_word \
|
|
< <(find_files_matching_word "$word" "$@")
|
|
|
|
if [ ${#files_matching_word[@]} -eq 0 ]; then
|
|
return
|
|
fi
|
|
|
|
if echo "$replacement"| grep -q ","; then
|
|
echo "TODO: $id"
|
|
return
|
|
fi
|
|
|
|
declare -A md5sums
|
|
|
|
local changed f before after
|
|
changed=false
|
|
for f in "${files_matching_word[@]}"; do
|
|
if [ "${md5sums[$f]}" = "" ]; then
|
|
md5sums[$f]=$(md5sum "$f")
|
|
fi
|
|
|
|
before="${md5sums[$f]}"
|
|
|
|
replace_word_in_file \
|
|
"$word" \
|
|
"$replacement" \
|
|
"$f"
|
|
|
|
after=$(md5sum "$f")
|
|
|
|
if [ "$after" != "$before" ]; then
|
|
md5sums[$f]="$after"
|
|
changed=true
|
|
fi
|
|
done
|
|
|
|
if $changed; then
|
|
echo "$id"
|
|
fi
|
|
|
|
find_files_matching_word "$word" "${files_matching_word[@]}" \
|
|
| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
|
|
}
|
|
|
|
main ()
|
|
{
|
|
declare -a unique_files
|
|
parse_args "$@"
|
|
|
|
get_dictionary
|
|
|
|
declare -a words
|
|
declare -a replacements
|
|
parse_dictionary
|
|
|
|
# Reduce set of files for sed to operate on.
|
|
local files_matching_words
|
|
declare -a files_matching_words
|
|
mapfile -t files_matching_words \
|
|
< <(find_files_matching_words "${unique_files[@]}")
|
|
|
|
if [ ${#files_matching_words[@]} -eq 0 ]; then
|
|
return
|
|
fi
|
|
|
|
local i word replacement
|
|
i=0
|
|
for word in "${words[@]}"; do
|
|
replacement=${replacements[$i]}
|
|
i=$((i + 1))
|
|
|
|
replace_word_in_files \
|
|
"$word" \
|
|
"$replacement" \
|
|
"${files_matching_words[@]}"
|
|
done
|
|
}
|
|
|
|
main "$@"
|