binutils-gdb/gdb/contrib/spellcheck.sh

495 lines
8.3 KiB
Bash
Raw Normal View History

#!/bin/bash
# Copyright (C) 2024 Free Software Foundation, Inc.
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Script to auto-correct common spelling mistakes.
#
# Example usage:
# $ ./gdb/contrib/spellcheck.sh gdb*
scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
this_script=$scriptdir/$(basename "$0")
url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
cache_dir=$scriptdir/../../.git
cache_file=wikipedia-common-misspellings.txt
dictionary=$cache_dir/$cache_file
local_dictionary=$scriptdir/common-misspellings.txt
cache_file2=spell-check.pat1
# Separators: space, slash, tab, colon, comma.
declare -a grep_separators
grep_separators=(
" "
"/"
" "
":"
","
"\""
)
declare -a sed_separators
sed_separators=(
" "
"/"
"\t"
":"
","
"\""
)
# Pre: start of line, left parenthesis.
declare -a grep_pre
grep_pre=(
"^"
"\("
)
declare -a sed_pre
sed_pre=(
"^"
"("
)
# Post: dot, right parenthesis, end of line.
declare -a grep_post
grep_post=(
"\."
"\)"
"$"
)
declare -a sed_post
sed_post=(
"\."
")"
"$"
)
join ()
{
local or
or="$1"
shift
local res
res=""
local first
first=true
for item in "$@"; do
if $first; then
first=false
res="$item"
else
res="$res$or$item"
fi
done
echo "$res"
}
grep_or="|"
sed_or="\|"
grep_join ()
{
local res
res=$(join $grep_or "$@")
echo "($res)"
}
sed_join ()
{
local res
res=$(join $sed_or "$@")
echo "\($res\)"
}
usage ()
{
echo "usage: $(basename "$0") [--check] <file|dir>+"
echo " $(basename "$0") --print-dictionary"
}
make_absolute ()
{
local arg
arg="$1"
case "$arg" in
/*)
;;
*)
arg=$(pwd -P)/"$arg"
;;
esac
echo "$arg"
}
parse_args ()
{
local files
files=$(mktemp)
trap 'rm -f "$files"' EXIT
if [ $# -eq 1 ] && [ "$1" = "--print-dictionary" ]; then
print_dictionary=true
return
fi
while true; do
case " $1 " in
" --check ")
check=true
shift
;;
*)
break
;;
esac
done
if [ $# -eq -0 ]; then
usage
exit 1
fi
local arg
for arg in "$@"; do
if [ -f "$arg" ]; then
arg=$(make_absolute "$arg")
readlink -e "$arg" \
>> "$files"
elif [ -d "$arg" ]; then
arg=$(make_absolute "$arg")
local f
find "$arg" -type f -exec readlink -e {} \; \
>> "$files"
else
echo "Not a file or directory: $arg"
exit 1
fi
done
mapfile -t unique_files \
< <(sort -u "$files" \
| grep -v ChangeLog)
rm -f "$files"
trap "" EXIT
}
get_dictionary ()
{
if [ -f "$dictionary" ]; then
return
fi
local webpage
webpage=$(mktemp)
trap 'rm -f "$webpage"' EXIT
# Download web page containing table.
wget $url -O "$webpage"
# Extract table from web page.
awk '/<pre>/,/<\/pre>/' "$webpage" \
| sed 's/<pre>//;s/<\/pre>//' \
| grep -E -v "^$" \
> "$dictionary"
rm -f "$webpage"
trap "" EXIT
}
output_local_dictionary ()
{
# Filter out comments and empty lines.
grep -E -v \
"^#|^$" \
"$local_dictionary"
}
output_dictionaries ()
{
(
output_local_dictionary
cat "$dictionary"
) | grep -E -v "[A-Z]"
}
parse_dictionary ()
{
# Parse dictionary.
mapfile -t words \
< <(awk -F '->' '{print $1}' <(output_dictionaries))
mapfile -t replacements \
< <(awk -F '->' '{print $2}' <(output_dictionaries))
local words_done
declare -A words_done
local i word replacement
i=0
for word in "${words[@]}"; do
replacement=${replacements[$i]}
# Skip words that are already handled. This ensures that the local
# dictionary overrides the wiki dictionary.
if [ "${words_done[$word]}" == 1 ]; then
words[$i]=""
replacements[$i]=""
i=$((i + 1))
continue
fi
words_done[$word]=1
# Skip identity rules.
if [ "$word" = "$replacement" ]; then
words[$i]=""
replacements[$i]=""
fi
i=$((i + 1))
done
}
print_dictionary ()
{
local i word replacement
i=0
for word in "${words[@]}"; do
replacement=${replacements[$i]}
i=$((i + 1))
if [ "$word" == "" ]; then
continue
fi
echo "$word -> $replacement"
done
}
find_files_matching_words ()
{
local cache_id
cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
| md5sum \
| awk '{print $1}')
local patfile
patfile="$cache_dir/$cache_file2".$cache_id
local pat
if [ -f "$patfile" ]; then
pat=$(cat "$patfile")
else
rm -f "$cache_dir/$cache_file2".*
declare -a re_words
mapfile -t re_words \
< <(for f in "${words[@]}"; do
if [ "$f" = "" ]; then
continue
fi
echo "$f"
done \
| sed "s/^\(.\)/[\u\1\1]/")
pat=$(grep_join "${re_words[@]}")
local before after
before=$(grep_join \
"${grep_pre[@]}" \
"${grep_separators[@]}")
after=$(grep_join \
"${grep_separators[@]}" \
"${grep_post[@]}")
pat="$before$pat$after"
echo "$pat" \
> "$patfile"
fi
grep -E \
-l \
"$pat" \
"$@"
}
find_files_matching_word ()
{
local pat
pat="$1"
shift
local before after
before=$(grep_join \
"${grep_pre[@]}" \
"${grep_separators[@]}")
after=$(grep_join \
"${grep_separators[@]}" \
"${grep_post[@]}")
pat="(${pat@u}|$pat)"
pat="$before$pat$after"
grep -E \
-l \
"$pat" \
"$@"
}
replace_word_in_file ()
{
local word
word="$1"
local replacement
replacement="$2"
local file
file="$3"
local before after
before=$(sed_join \
"${sed_pre[@]}" \
"${sed_separators[@]}")
after=$(sed_join \
"${sed_separators[@]}" \
"${sed_post[@]}")
local repl1
local repl2
repl1="s%$before$word$after%\1$replacement\2%g"
repl2="s%$before${word@u}$after%\1${replacement@u}\2%g"
sed -i \
"$repl1;$repl2" \
"$file"
}
replace_word_in_files ()
{
local word
word="$1"
local replacement
replacement="$2"
shift 2
local id
id="$word -> $replacement"
# Reduce set of files for sed to operate on.
local files_matching_word
declare -a files_matching_word
mapfile -t files_matching_word \
< <(find_files_matching_word "$word" "$@")
if [ ${#files_matching_word[@]} -eq 0 ]; then
return
fi
if echo "$replacement"| grep -q ","; then
echo "TODO: $id"
return
fi
declare -A md5sums
local changed f before after
changed=false
for f in "${files_matching_word[@]}"; do
if [ "${md5sums[$f]}" = "" ]; then
md5sums[$f]=$(md5sum "$f")
fi
before="${md5sums[$f]}"
replace_word_in_file \
"$word" \
"$replacement" \
"$f"
after=$(md5sum "$f")
if [ "$after" != "$before" ]; then
md5sums[$f]="$after"
changed=true
fi
done
if $changed; then
echo "$id"
fi
find_files_matching_word "$word" "${files_matching_word[@]}" \
| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
}
main ()
{
declare -a unique_files
check=false
print_dictionary=false
parse_args "$@"
get_dictionary
declare -a words
declare -a replacements
parse_dictionary
if $print_dictionary; then
print_dictionary
exit 0
fi
# Reduce set of files for sed to operate on.
local files_matching_words
declare -a files_matching_words
mapfile -t files_matching_words \
< <(find_files_matching_words "${unique_files[@]}")
if [ ${#files_matching_words[@]} -eq 0 ]; then
return
fi
if $check; then
exit 1
fi
local i word replacement
i=0
for word in "${words[@]}"; do
replacement=${replacements[$i]}
i=$((i + 1))
if [ "$word" = "" ]; then
continue
fi
replace_word_in_files \
"$word" \
"$replacement" \
"${files_matching_words[@]}"
done
}
main "$@"