2024-10-06 13:59:48 +08:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
# Copyright (C) 2024 Free Software Foundation, Inc.
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
# the Free Software Foundation; either version 3 of the License, or
|
|
|
|
# (at your option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License
|
|
|
|
# along with this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
|
|
|
|
# Script to auto-correct common spelling mistakes.
|
|
|
|
#
|
|
|
|
# Example usage:
|
|
|
|
# $ ./gdb/contrib/spellcheck.sh gdb*
|
|
|
|
|
|
|
|
scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
|
2024-10-21 21:07:02 +08:00
|
|
|
this_script=$scriptdir/$(basename "$0")
|
2024-10-06 13:59:48 +08:00
|
|
|
|
|
|
|
url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
|
|
|
|
cache_dir=$scriptdir/../../.git
|
|
|
|
cache_file=wikipedia-common-misspellings.txt
|
|
|
|
dictionary=$cache_dir/$cache_file
|
2024-10-08 14:24:13 +08:00
|
|
|
local_dictionary=$scriptdir/common-misspellings.txt
|
2024-10-21 21:07:02 +08:00
|
|
|
cache_file2=spell-check.pat1
|
2024-10-06 13:59:48 +08:00
|
|
|
|
2024-10-08 14:24:13 +08:00
|
|
|
# Separators: space, slash, tab, colon, comma.
|
2024-10-08 14:24:13 +08:00
|
|
|
declare -a grep_separators
|
|
|
|
grep_separators=(
|
|
|
|
" "
|
|
|
|
"/"
|
|
|
|
" "
|
2024-10-08 14:24:13 +08:00
|
|
|
":"
|
|
|
|
","
|
2024-11-14 04:06:58 +08:00
|
|
|
"\""
|
2024-10-08 14:24:13 +08:00
|
|
|
)
|
|
|
|
declare -a sed_separators
|
|
|
|
sed_separators=(
|
|
|
|
" "
|
|
|
|
"/"
|
|
|
|
"\t"
|
2024-10-08 14:24:13 +08:00
|
|
|
":"
|
|
|
|
","
|
2024-11-14 04:06:58 +08:00
|
|
|
"\""
|
2024-10-08 14:24:13 +08:00
|
|
|
)
|
|
|
|
|
2024-11-14 04:06:58 +08:00
|
|
|
# Pre: start of line, left parenthesis.
|
|
|
|
declare -a grep_pre
|
|
|
|
grep_pre=(
|
|
|
|
"^"
|
|
|
|
"\("
|
|
|
|
)
|
|
|
|
declare -a sed_pre
|
|
|
|
sed_pre=(
|
|
|
|
"^"
|
|
|
|
"("
|
|
|
|
)
|
|
|
|
|
|
|
|
# Post: dot, right parenthesis, end of line.
|
|
|
|
declare -a grep_post
|
|
|
|
grep_post=(
|
|
|
|
"\."
|
|
|
|
"\)"
|
|
|
|
"$"
|
|
|
|
)
|
|
|
|
declare -a sed_post
|
|
|
|
sed_post=(
|
|
|
|
"\."
|
|
|
|
")"
|
|
|
|
"$"
|
|
|
|
)
|
|
|
|
|
2024-10-08 14:24:13 +08:00
|
|
|
join ()
|
|
|
|
{
|
|
|
|
local or
|
|
|
|
or="$1"
|
|
|
|
shift
|
|
|
|
|
|
|
|
local res
|
|
|
|
res=""
|
|
|
|
|
|
|
|
local first
|
|
|
|
first=true
|
|
|
|
|
|
|
|
for item in "$@"; do
|
|
|
|
if $first; then
|
|
|
|
first=false
|
|
|
|
res="$item"
|
|
|
|
else
|
|
|
|
res="$res$or$item"
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
echo "$res"
|
|
|
|
}
|
|
|
|
|
|
|
|
grep_or="|"
|
|
|
|
sed_or="\|"
|
2024-10-21 21:19:25 +08:00
|
|
|
|
|
|
|
grep_join ()
|
|
|
|
{
|
|
|
|
local res
|
|
|
|
res=$(join $grep_or "$@")
|
|
|
|
echo "($res)"
|
|
|
|
}
|
|
|
|
|
|
|
|
sed_join ()
|
|
|
|
{
|
|
|
|
local res
|
|
|
|
res=$(join $sed_or "$@")
|
|
|
|
echo "\($res\)"
|
|
|
|
}
|
2024-10-06 13:59:48 +08:00
|
|
|
|
|
|
|
usage ()
|
|
|
|
{
|
2024-10-21 21:07:02 +08:00
|
|
|
echo "usage: $(basename "$0") [--check] <file|dir>+"
|
2024-11-18 16:42:04 +08:00
|
|
|
echo " $(basename "$0") --print-dictionary"
|
2024-10-06 13:59:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
make_absolute ()
|
|
|
|
{
|
|
|
|
local arg
|
|
|
|
arg="$1"
|
|
|
|
|
|
|
|
case "$arg" in
|
|
|
|
/*)
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
arg=$(pwd -P)/"$arg"
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
|
|
|
|
echo "$arg"
|
|
|
|
}
|
|
|
|
|
|
|
|
parse_args ()
|
|
|
|
{
|
|
|
|
local files
|
|
|
|
files=$(mktemp)
|
|
|
|
trap 'rm -f "$files"' EXIT
|
|
|
|
|
2024-11-18 16:42:04 +08:00
|
|
|
if [ $# -eq 1 ] && [ "$1" = "--print-dictionary" ]; then
|
|
|
|
print_dictionary=true
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
|
2024-10-21 21:07:02 +08:00
|
|
|
while true; do
|
|
|
|
case " $1 " in
|
|
|
|
" --check ")
|
|
|
|
check=true
|
|
|
|
shift
|
|
|
|
;;
|
|
|
|
*)
|
|
|
|
break
|
|
|
|
;;
|
|
|
|
esac
|
|
|
|
done
|
|
|
|
|
2024-10-06 13:59:48 +08:00
|
|
|
if [ $# -eq -0 ]; then
|
|
|
|
usage
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
|
|
|
local arg
|
|
|
|
for arg in "$@"; do
|
|
|
|
if [ -f "$arg" ]; then
|
|
|
|
arg=$(make_absolute "$arg")
|
|
|
|
readlink -e "$arg" \
|
|
|
|
>> "$files"
|
|
|
|
elif [ -d "$arg" ]; then
|
|
|
|
arg=$(make_absolute "$arg")
|
|
|
|
local f
|
|
|
|
find "$arg" -type f -exec readlink -e {} \; \
|
|
|
|
>> "$files"
|
|
|
|
else
|
|
|
|
echo "Not a file or directory: $arg"
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
mapfile -t unique_files \
|
|
|
|
< <(sort -u "$files" \
|
|
|
|
| grep -v ChangeLog)
|
|
|
|
|
|
|
|
rm -f "$files"
|
|
|
|
trap "" EXIT
|
|
|
|
}
|
|
|
|
|
|
|
|
get_dictionary ()
|
|
|
|
{
|
|
|
|
if [ -f "$dictionary" ]; then
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
|
|
|
|
local webpage
|
|
|
|
webpage=$(mktemp)
|
|
|
|
trap 'rm -f "$webpage"' EXIT
|
|
|
|
|
|
|
|
# Download web page containing table.
|
|
|
|
wget $url -O "$webpage"
|
|
|
|
|
|
|
|
# Extract table from web page.
|
|
|
|
awk '/<pre>/,/<\/pre>/' "$webpage" \
|
|
|
|
| sed 's/<pre>//;s/<\/pre>//' \
|
|
|
|
| grep -E -v "^$" \
|
|
|
|
> "$dictionary"
|
|
|
|
|
|
|
|
rm -f "$webpage"
|
|
|
|
trap "" EXIT
|
|
|
|
}
|
|
|
|
|
2024-10-08 14:24:13 +08:00
|
|
|
output_local_dictionary ()
|
|
|
|
{
|
|
|
|
# Filter out comments and empty lines.
|
|
|
|
grep -E -v \
|
|
|
|
"^#|^$" \
|
|
|
|
"$local_dictionary"
|
|
|
|
}
|
|
|
|
|
|
|
|
output_dictionaries ()
|
|
|
|
{
|
2024-11-14 05:38:19 +08:00
|
|
|
(
|
|
|
|
output_local_dictionary
|
|
|
|
cat "$dictionary"
|
|
|
|
) | grep -E -v "[A-Z]"
|
2024-10-08 14:24:13 +08:00
|
|
|
}
|
|
|
|
|
2024-10-06 13:59:48 +08:00
|
|
|
parse_dictionary ()
|
|
|
|
{
|
|
|
|
# Parse dictionary.
|
|
|
|
mapfile -t words \
|
2024-10-08 14:24:13 +08:00
|
|
|
< <(awk -F '->' '{print $1}' <(output_dictionaries))
|
2024-10-06 13:59:48 +08:00
|
|
|
mapfile -t replacements \
|
2024-10-08 14:24:13 +08:00
|
|
|
< <(awk -F '->' '{print $2}' <(output_dictionaries))
|
2024-11-18 16:42:03 +08:00
|
|
|
|
|
|
|
local words_done
|
|
|
|
declare -A words_done
|
|
|
|
local i word replacement
|
|
|
|
i=0
|
|
|
|
for word in "${words[@]}"; do
|
|
|
|
replacement=${replacements[$i]}
|
|
|
|
|
|
|
|
# Skip words that are already handled. This ensures that the local
|
|
|
|
# dictionary overrides the wiki dictionary.
|
|
|
|
if [ "${words_done[$word]}" == 1 ]; then
|
|
|
|
words[$i]=""
|
|
|
|
replacements[$i]=""
|
|
|
|
i=$((i + 1))
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
words_done[$word]=1
|
|
|
|
|
|
|
|
# Skip identity rules.
|
|
|
|
if [ "$word" = "$replacement" ]; then
|
|
|
|
words[$i]=""
|
|
|
|
replacements[$i]=""
|
|
|
|
fi
|
|
|
|
|
|
|
|
i=$((i + 1))
|
|
|
|
done
|
2024-10-06 13:59:48 +08:00
|
|
|
}
|
|
|
|
|
2024-11-18 16:42:04 +08:00
|
|
|
print_dictionary ()
|
|
|
|
{
|
|
|
|
local i word replacement
|
|
|
|
i=0
|
|
|
|
for word in "${words[@]}"; do
|
|
|
|
replacement=${replacements[$i]}
|
|
|
|
i=$((i + 1))
|
|
|
|
|
|
|
|
if [ "$word" == "" ]; then
|
|
|
|
continue
|
|
|
|
fi
|
|
|
|
|
|
|
|
echo "$word -> $replacement"
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
2024-10-06 13:59:48 +08:00
|
|
|
find_files_matching_words ()
|
|
|
|
{
|
2024-10-21 21:07:02 +08:00
|
|
|
local cache_id
|
|
|
|
cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
|
|
|
|
| md5sum \
|
|
|
|
| awk '{print $1}')
|
|
|
|
|
|
|
|
local patfile
|
|
|
|
patfile="$cache_dir/$cache_file2".$cache_id
|
|
|
|
|
2024-10-21 21:19:25 +08:00
|
|
|
local pat
|
2024-10-21 21:07:02 +08:00
|
|
|
if [ -f "$patfile" ]; then
|
|
|
|
pat=$(cat "$patfile")
|
|
|
|
else
|
|
|
|
rm -f "$cache_dir/$cache_file2".*
|
|
|
|
|
2024-11-14 05:38:19 +08:00
|
|
|
declare -a re_words
|
|
|
|
mapfile -t re_words \
|
|
|
|
< <(for f in "${words[@]}"; do
|
2024-11-18 16:42:03 +08:00
|
|
|
if [ "$f" = "" ]; then
|
|
|
|
continue
|
|
|
|
fi
|
2024-11-14 05:38:19 +08:00
|
|
|
echo "$f"
|
|
|
|
done \
|
|
|
|
| sed "s/^\(.\)/[\u\1\1]/")
|
|
|
|
|
|
|
|
pat=$(grep_join "${re_words[@]}")
|
2024-10-21 21:07:02 +08:00
|
|
|
|
2024-10-21 21:19:25 +08:00
|
|
|
local before after
|
|
|
|
before=$(grep_join \
|
2024-11-14 04:06:58 +08:00
|
|
|
"${grep_pre[@]}" \
|
2024-10-21 21:19:25 +08:00
|
|
|
"${grep_separators[@]}")
|
|
|
|
after=$(grep_join \
|
|
|
|
"${grep_separators[@]}" \
|
2024-11-14 04:06:58 +08:00
|
|
|
"${grep_post[@]}")
|
2024-10-21 21:07:02 +08:00
|
|
|
|
2024-10-21 21:19:25 +08:00
|
|
|
pat="$before$pat$after"
|
2024-10-21 21:07:02 +08:00
|
|
|
|
|
|
|
echo "$pat" \
|
|
|
|
> "$patfile"
|
|
|
|
fi
|
2024-10-06 13:59:48 +08:00
|
|
|
|
|
|
|
grep -E \
|
|
|
|
-l \
|
|
|
|
"$pat" \
|
|
|
|
"$@"
|
|
|
|
}
|
|
|
|
|
|
|
|
find_files_matching_word ()
|
|
|
|
{
|
|
|
|
local pat
|
|
|
|
pat="$1"
|
|
|
|
shift
|
|
|
|
|
2024-10-21 21:19:25 +08:00
|
|
|
local before after
|
|
|
|
before=$(grep_join \
|
2024-11-14 04:06:58 +08:00
|
|
|
"${grep_pre[@]}" \
|
2024-10-21 21:19:25 +08:00
|
|
|
"${grep_separators[@]}")
|
|
|
|
after=$(grep_join \
|
|
|
|
"${grep_separators[@]}" \
|
2024-11-14 04:06:58 +08:00
|
|
|
"${grep_post[@]}")
|
2024-10-06 13:59:48 +08:00
|
|
|
|
2024-11-14 05:38:19 +08:00
|
|
|
pat="(${pat@u}|$pat)"
|
|
|
|
|
2024-10-21 21:19:25 +08:00
|
|
|
pat="$before$pat$after"
|
2024-10-06 13:59:48 +08:00
|
|
|
|
|
|
|
grep -E \
|
|
|
|
-l \
|
|
|
|
"$pat" \
|
|
|
|
"$@"
|
|
|
|
}
|
|
|
|
|
|
|
|
replace_word_in_file ()
|
|
|
|
{
|
|
|
|
local word
|
|
|
|
word="$1"
|
|
|
|
|
|
|
|
local replacement
|
|
|
|
replacement="$2"
|
|
|
|
|
|
|
|
local file
|
|
|
|
file="$3"
|
|
|
|
|
2024-10-21 21:19:25 +08:00
|
|
|
local before after
|
|
|
|
before=$(sed_join \
|
2024-11-14 04:06:58 +08:00
|
|
|
"${sed_pre[@]}" \
|
2024-10-21 21:19:25 +08:00
|
|
|
"${sed_separators[@]}")
|
|
|
|
after=$(sed_join \
|
|
|
|
"${sed_separators[@]}" \
|
2024-11-14 04:06:58 +08:00
|
|
|
"${sed_post[@]}")
|
2024-10-06 13:59:48 +08:00
|
|
|
|
2024-11-14 05:38:19 +08:00
|
|
|
local repl1
|
|
|
|
local repl2
|
|
|
|
repl1="s%$before$word$after%\1$replacement\2%g"
|
|
|
|
repl2="s%$before${word@u}$after%\1${replacement@u}\2%g"
|
2024-10-06 13:59:48 +08:00
|
|
|
|
|
|
|
sed -i \
|
2024-11-14 05:38:19 +08:00
|
|
|
"$repl1;$repl2" \
|
2024-10-06 13:59:48 +08:00
|
|
|
"$file"
|
|
|
|
}
|
|
|
|
|
|
|
|
replace_word_in_files ()
|
|
|
|
{
|
|
|
|
local word
|
|
|
|
word="$1"
|
|
|
|
|
|
|
|
local replacement
|
|
|
|
replacement="$2"
|
|
|
|
|
|
|
|
shift 2
|
|
|
|
|
|
|
|
local id
|
|
|
|
id="$word -> $replacement"
|
|
|
|
|
|
|
|
# Reduce set of files for sed to operate on.
|
|
|
|
local files_matching_word
|
|
|
|
declare -a files_matching_word
|
|
|
|
mapfile -t files_matching_word \
|
|
|
|
< <(find_files_matching_word "$word" "$@")
|
|
|
|
|
|
|
|
if [ ${#files_matching_word[@]} -eq 0 ]; then
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
|
|
|
|
if echo "$replacement"| grep -q ","; then
|
|
|
|
echo "TODO: $id"
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
|
|
|
|
declare -A md5sums
|
|
|
|
|
|
|
|
local changed f before after
|
|
|
|
changed=false
|
|
|
|
for f in "${files_matching_word[@]}"; do
|
|
|
|
if [ "${md5sums[$f]}" = "" ]; then
|
|
|
|
md5sums[$f]=$(md5sum "$f")
|
|
|
|
fi
|
|
|
|
|
|
|
|
before="${md5sums[$f]}"
|
|
|
|
|
|
|
|
replace_word_in_file \
|
|
|
|
"$word" \
|
|
|
|
"$replacement" \
|
|
|
|
"$f"
|
|
|
|
|
|
|
|
after=$(md5sum "$f")
|
|
|
|
|
|
|
|
if [ "$after" != "$before" ]; then
|
|
|
|
md5sums[$f]="$after"
|
|
|
|
changed=true
|
|
|
|
fi
|
|
|
|
done
|
|
|
|
|
|
|
|
if $changed; then
|
|
|
|
echo "$id"
|
|
|
|
fi
|
|
|
|
|
|
|
|
find_files_matching_word "$word" "${files_matching_word[@]}" \
|
|
|
|
| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
|
|
|
|
}
|
|
|
|
|
|
|
|
main ()
|
|
|
|
{
|
|
|
|
declare -a unique_files
|
2024-10-21 21:07:02 +08:00
|
|
|
check=false
|
2024-11-18 16:42:04 +08:00
|
|
|
print_dictionary=false
|
2024-10-06 13:59:48 +08:00
|
|
|
parse_args "$@"
|
|
|
|
|
|
|
|
get_dictionary
|
|
|
|
|
|
|
|
declare -a words
|
|
|
|
declare -a replacements
|
|
|
|
parse_dictionary
|
|
|
|
|
2024-11-18 16:42:04 +08:00
|
|
|
if $print_dictionary; then
|
|
|
|
print_dictionary
|
|
|
|
exit 0
|
|
|
|
fi
|
|
|
|
|
2024-10-06 13:59:48 +08:00
|
|
|
# Reduce set of files for sed to operate on.
|
|
|
|
local files_matching_words
|
|
|
|
declare -a files_matching_words
|
|
|
|
mapfile -t files_matching_words \
|
|
|
|
< <(find_files_matching_words "${unique_files[@]}")
|
|
|
|
|
|
|
|
if [ ${#files_matching_words[@]} -eq 0 ]; then
|
|
|
|
return
|
|
|
|
fi
|
|
|
|
|
2024-10-21 21:07:02 +08:00
|
|
|
if $check; then
|
|
|
|
exit 1
|
|
|
|
fi
|
|
|
|
|
2024-10-06 13:59:48 +08:00
|
|
|
local i word replacement
|
|
|
|
i=0
|
|
|
|
for word in "${words[@]}"; do
|
|
|
|
replacement=${replacements[$i]}
|
|
|
|
i=$((i + 1))
|
|
|
|
|
2024-11-18 16:42:03 +08:00
|
|
|
if [ "$word" = "" ]; then
|
2024-10-08 14:24:13 +08:00
|
|
|
continue
|
|
|
|
fi
|
|
|
|
|
2024-10-06 13:59:48 +08:00
|
|
|
replace_word_in_files \
|
|
|
|
"$word" \
|
|
|
|
"$replacement" \
|
|
|
|
"${files_matching_words[@]}"
|
|
|
|
done
|
|
|
|
}
|
|
|
|
|
|
|
|
main "$@"
|