Home | History | Annotate | Line # | Download | only in contrib
      1  1.1  christos #!/bin/bash
      2  1.1  christos 
      3  1.1  christos # Copyright (C) 2024 Free Software Foundation, Inc.
      4  1.1  christos # This program is free software; you can redistribute it and/or modify
      5  1.1  christos # it under the terms of the GNU General Public License as published by
      6  1.1  christos # the Free Software Foundation; either version 3 of the License, or
      7  1.1  christos # (at your option) any later version.
      8  1.1  christos #
      9  1.1  christos # This program is distributed in the hope that it will be useful,
     10  1.1  christos # but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  1.1  christos # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  1.1  christos # GNU General Public License for more details.
     13  1.1  christos #
     14  1.1  christos # You should have received a copy of the GNU General Public License
     15  1.1  christos # along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16  1.1  christos 
     17  1.1  christos # Script to auto-correct common spelling mistakes.
     18  1.1  christos #
     19  1.1  christos # Example usage:
     20  1.1  christos # $ ./gdb/contrib/spellcheck.sh gdb*
     21  1.1  christos 
     22  1.1  christos scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
     23  1.1  christos this_script=$scriptdir/$(basename "$0")
     24  1.1  christos 
     25  1.1  christos url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
     26  1.1  christos cache_dir=$scriptdir/../../.git
     27  1.1  christos cache_file=wikipedia-common-misspellings.txt
     28  1.1  christos dictionary=$cache_dir/$cache_file
     29  1.1  christos local_dictionary=$scriptdir/common-misspellings.txt
     30  1.1  christos cache_file2=spell-check.pat1
     31  1.1  christos 
     32  1.1  christos bash_version_at_least ()
     33  1.1  christos {
     34  1.1  christos     local major
     35  1.1  christos     major="$1"
     36  1.1  christos     local minor
     37  1.1  christos     minor="$2"
     38  1.1  christos 
     39  1.1  christos     if [ "$bash_major" = "" ]; then
     40  1.1  christos 	bash_major=$(echo "$BASH_VERSION" | awk -F '.' '{print $1}')
     41  1.1  christos 	bash_minor=$(echo "$BASH_VERSION" | awk -F '.' '{print $2}')
     42  1.1  christos     fi
     43  1.1  christos 
     44  1.1  christos     if [ "$bash_major" -lt "$major" ]; then
     45  1.1  christos 	# Major version less then required, return false.
     46  1.1  christos 	return 1
     47  1.1  christos     fi
     48  1.1  christos 
     49  1.1  christos     if [ "$bash_major" -gt "$major" ]; then
     50  1.1  christos 	# Major version more then required, return true.
     51  1.1  christos 	return 0
     52  1.1  christos     fi
     53  1.1  christos 
     54  1.1  christos     # Check minor version.
     55  1.1  christos     [ "$bash_minor" -ge "$minor" ]
     56  1.1  christos }
     57  1.1  christos 
     58  1.1  christos # Separators: space, slash, tab, colon, comma.
     59  1.1  christos declare -a grep_separators
     60  1.1  christos grep_separators=(
     61  1.1  christos     " "
     62  1.1  christos     "/"
     63  1.1  christos     "	"
     64  1.1  christos     ":"
     65  1.1  christos     ","
     66  1.1  christos     "\""
     67  1.1  christos )
     68  1.1  christos declare -a sed_separators
     69  1.1  christos sed_separators=(
     70  1.1  christos     " "
     71  1.1  christos     "/"
     72  1.1  christos     "\t"
     73  1.1  christos     ":"
     74  1.1  christos     ","
     75  1.1  christos     "\""
     76  1.1  christos )
     77  1.1  christos 
     78  1.1  christos # Pre: start of line, left parenthesis.
     79  1.1  christos declare -a grep_pre
     80  1.1  christos grep_pre=(
     81  1.1  christos     "^"
     82  1.1  christos     "\("
     83  1.1  christos )
     84  1.1  christos declare -a sed_pre
     85  1.1  christos sed_pre=(
     86  1.1  christos     "^"
     87  1.1  christos     "("
     88  1.1  christos )
     89  1.1  christos 
     90  1.1  christos # Post: dot, right parenthesis, end of line.
     91  1.1  christos declare -a grep_post
     92  1.1  christos grep_post=(
     93  1.1  christos     "\."
     94  1.1  christos     "\)"
     95  1.1  christos     "$"
     96  1.1  christos )
     97  1.1  christos declare -a sed_post
     98  1.1  christos sed_post=(
     99  1.1  christos     "\."
    100  1.1  christos     ")"
    101  1.1  christos     "$"
    102  1.1  christos )
    103  1.1  christos 
    104  1.1  christos join ()
    105  1.1  christos {
    106  1.1  christos     local or
    107  1.1  christos     or="$1"
    108  1.1  christos     shift
    109  1.1  christos 
    110  1.1  christos     local res
    111  1.1  christos     res=""
    112  1.1  christos 
    113  1.1  christos     local first
    114  1.1  christos     first=true
    115  1.1  christos 
    116  1.1  christos     for item in "$@"; do
    117  1.1  christos 	if $first; then
    118  1.1  christos 	    first=false
    119  1.1  christos 	    res="$item"
    120  1.1  christos 	else
    121  1.1  christos 	    res="$res$or$item"
    122  1.1  christos 	fi
    123  1.1  christos     done
    124  1.1  christos 
    125  1.1  christos     echo "$res"
    126  1.1  christos }
    127  1.1  christos 
    128  1.1  christos grep_or="|"
    129  1.1  christos sed_or="\|"
    130  1.1  christos 
    131  1.1  christos grep_join ()
    132  1.1  christos {
    133  1.1  christos     local res
    134  1.1  christos     res=$(join $grep_or "$@")
    135  1.1  christos     echo "($res)"
    136  1.1  christos }
    137  1.1  christos 
    138  1.1  christos sed_join ()
    139  1.1  christos {
    140  1.1  christos     local res
    141  1.1  christos     res=$(join $sed_or "$@")
    142  1.1  christos     echo "\($res\)"
    143  1.1  christos }
    144  1.1  christos 
    145  1.1  christos usage ()
    146  1.1  christos {
    147  1.1  christos     echo "usage: $(basename "$0") [--check] <file|dir>+"
    148  1.1  christos     echo "       $(basename "$0") --print-dictionary"
    149  1.1  christos }
    150  1.1  christos 
    151  1.1  christos make_absolute ()
    152  1.1  christos {
    153  1.1  christos     local arg
    154  1.1  christos     arg="$1"
    155  1.1  christos 
    156  1.1  christos     case "$arg" in
    157  1.1  christos 	/*)
    158  1.1  christos 	;;
    159  1.1  christos 	*)
    160  1.1  christos 	    arg=$(pwd -P)/"$arg"
    161  1.1  christos 	    ;;
    162  1.1  christos     esac
    163  1.1  christos 
    164  1.1  christos     echo "$arg"
    165  1.1  christos }
    166  1.1  christos 
    167  1.1  christos parse_args ()
    168  1.1  christos {
    169  1.1  christos     local files
    170  1.1  christos     files=$(mktemp)
    171  1.1  christos     trap 'rm -f "$files"' EXIT
    172  1.1  christos 
    173  1.1  christos     if [ $# -eq 1 ] && [ "$1" =  "--print-dictionary" ]; then
    174  1.1  christos 	print_dictionary=true
    175  1.1  christos 	return
    176  1.1  christos     fi
    177  1.1  christos 
    178  1.1  christos     while true; do
    179  1.1  christos 	case " $1 " in
    180  1.1  christos 	    " --check ")
    181  1.1  christos 		check=true
    182  1.1  christos 		shift
    183  1.1  christos 		;;
    184  1.1  christos 	    *)
    185  1.1  christos 		break
    186  1.1  christos 		;;
    187  1.1  christos 	esac
    188  1.1  christos     done
    189  1.1  christos 
    190  1.1  christos     if [ $# -eq -0 ]; then
    191  1.1  christos 	usage
    192  1.1  christos 	exit 1
    193  1.1  christos     fi
    194  1.1  christos 
    195  1.1  christos     local arg
    196  1.1  christos     for arg in "$@"; do
    197  1.1  christos 	if [ -f "$arg" ]; then
    198  1.1  christos 	    arg=$(make_absolute "$arg")
    199  1.1  christos 	    readlink -e "$arg" \
    200  1.1  christos 		     >> "$files"
    201  1.1  christos 	elif [ -d "$arg" ]; then
    202  1.1  christos 	    arg=$(make_absolute "$arg")
    203  1.1  christos 	    local f
    204  1.1  christos 	    find "$arg" -type f -exec readlink -e {} \; \
    205  1.1  christos 		 >> "$files"
    206  1.1  christos 	else
    207  1.1  christos 	    echo "Not a file or directory: $arg"
    208  1.1  christos 	    exit 1
    209  1.1  christos 	fi
    210  1.1  christos     done
    211  1.1  christos 
    212  1.1  christos     mapfile -t unique_files \
    213  1.1  christos 	    < <(sort -u "$files" \
    214  1.1  christos 		    | grep -v ChangeLog)
    215  1.1  christos 
    216  1.1  christos     rm -f "$files"
    217  1.1  christos     trap "" EXIT
    218  1.1  christos }
    219  1.1  christos 
    220  1.1  christos get_dictionary ()
    221  1.1  christos {
    222  1.1  christos     if [ -f "$dictionary" ]; then
    223  1.1  christos 	return
    224  1.1  christos     fi
    225  1.1  christos 
    226  1.1  christos     local webpage
    227  1.1  christos     webpage=$(mktemp)
    228  1.1  christos     trap 'rm -f "$webpage"' EXIT
    229  1.1  christos 
    230  1.1  christos     # Download web page containing table.
    231  1.1  christos     wget $url -O "$webpage"
    232  1.1  christos 
    233  1.1  christos     # Extract table from web page.
    234  1.1  christos     awk '/<pre>/,/<\/pre>/' "$webpage" \
    235  1.1  christos 	| sed 's/<pre>//;s/<\/pre>//' \
    236  1.1  christos 	| grep -E -v "^$" \
    237  1.1  christos 	       > "$dictionary"
    238  1.1  christos 
    239  1.1  christos     rm -f "$webpage"
    240  1.1  christos     trap "" EXIT
    241  1.1  christos }
    242  1.1  christos 
    243  1.1  christos output_local_dictionary ()
    244  1.1  christos {
    245  1.1  christos     # Filter out comments and empty lines.
    246  1.1  christos     grep -E -v \
    247  1.1  christos 	 "^#|^$" \
    248  1.1  christos 	 "$local_dictionary"
    249  1.1  christos }
    250  1.1  christos 
    251  1.1  christos output_dictionaries ()
    252  1.1  christos {
    253  1.1  christos     (
    254  1.1  christos 	output_local_dictionary
    255  1.1  christos 	cat "$dictionary"
    256  1.1  christos     ) | grep -E -v "[A-Z]"
    257  1.1  christos }
    258  1.1  christos 
    259  1.1  christos parse_dictionary ()
    260  1.1  christos {
    261  1.1  christos     # Parse dictionary.
    262  1.1  christos     mapfile -t words \
    263  1.1  christos 	    < <(awk -F '->' '{print $1}' <(output_dictionaries))
    264  1.1  christos     mapfile -t replacements \
    265  1.1  christos 	    < <(awk -F '->' '{print $2}' <(output_dictionaries))
    266  1.1  christos 
    267  1.1  christos     local words_done
    268  1.1  christos     declare -A words_done
    269  1.1  christos     local i word replacement
    270  1.1  christos     i=0
    271  1.1  christos     for word in "${words[@]}"; do
    272  1.1  christos 	replacement=${replacements[i]}
    273  1.1  christos 
    274  1.1  christos 	# Skip words that are already handled.  This ensures that the local
    275  1.1  christos 	# dictionary overrides the wiki dictionary.
    276  1.1  christos 	if [ "${words_done[$word]}" == 1 ]; then
    277  1.1  christos 	    words[i]=""
    278  1.1  christos 	    replacements[i]=""
    279  1.1  christos 	    i=$((i + 1))
    280  1.1  christos 	    continue
    281  1.1  christos 	fi
    282  1.1  christos 	words_done[$word]=1
    283  1.1  christos 
    284  1.1  christos 	# Skip identity rules.
    285  1.1  christos 	if [ "$word" = "$replacement" ]; then
    286  1.1  christos 	    words[i]=""
    287  1.1  christos 	    replacements[i]=""
    288  1.1  christos 	fi
    289  1.1  christos 
    290  1.1  christos 	i=$((i + 1))
    291  1.1  christos     done
    292  1.1  christos }
    293  1.1  christos 
    294  1.1  christos print_dictionary ()
    295  1.1  christos {
    296  1.1  christos     local i word replacement
    297  1.1  christos     i=0
    298  1.1  christos     for word in "${words[@]}"; do
    299  1.1  christos 	replacement=${replacements[i]}
    300  1.1  christos 	i=$((i + 1))
    301  1.1  christos 
    302  1.1  christos 	if [ "$word" == "" ]; then
    303  1.1  christos 	    continue
    304  1.1  christos 	fi
    305  1.1  christos 
    306  1.1  christos 	echo "$word -> $replacement"
    307  1.1  christos     done
    308  1.1  christos }
    309  1.1  christos 
    310  1.1  christos find_files_matching_words ()
    311  1.1  christos {
    312  1.1  christos     local cache_id
    313  1.1  christos     cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
    314  1.1  christos 		 | md5sum  \
    315  1.1  christos 		 | awk '{print $1}')
    316  1.1  christos 
    317  1.1  christos     local patfile
    318  1.1  christos     patfile="$cache_dir/$cache_file2".$cache_id
    319  1.1  christos 
    320  1.1  christos     local pat
    321  1.1  christos     if [ -f "$patfile" ]; then
    322  1.1  christos 	pat=$(cat "$patfile")
    323  1.1  christos     else
    324  1.1  christos 	rm -f "$cache_dir/$cache_file2".*
    325  1.1  christos 
    326  1.1  christos 	declare -a re_words
    327  1.1  christos 	mapfile -t re_words \
    328  1.1  christos 		< <(for f in "${words[@]}"; do
    329  1.1  christos 			if [ "$f" = "" ]; then
    330  1.1  christos 			    continue
    331  1.1  christos 			fi
    332  1.1  christos 			echo "$f"
    333  1.1  christos 		    done \
    334  1.1  christos 			| sed "s/^\(.\)/[\u\1\1]/")
    335  1.1  christos 
    336  1.1  christos 	pat=$(grep_join "${re_words[@]}")
    337  1.1  christos 
    338  1.1  christos 	local before after
    339  1.1  christos 	before=$(grep_join \
    340  1.1  christos 		     "${grep_pre[@]}" \
    341  1.1  christos 		     "${grep_separators[@]}")
    342  1.1  christos 	after=$(grep_join \
    343  1.1  christos 		    "${grep_separators[@]}" \
    344  1.1  christos 		    "${grep_post[@]}")
    345  1.1  christos 
    346  1.1  christos 	pat="$before$pat$after"
    347  1.1  christos 
    348  1.1  christos 	echo "$pat" \
    349  1.1  christos 	     > "$patfile"
    350  1.1  christos     fi
    351  1.1  christos 
    352  1.1  christos     grep -E \
    353  1.1  christos 	-l \
    354  1.1  christos 	"$pat" \
    355  1.1  christos 	"$@"
    356  1.1  christos }
    357  1.1  christos 
    358  1.1  christos find_files_matching_word ()
    359  1.1  christos {
    360  1.1  christos     local pat
    361  1.1  christos     pat="$1"
    362  1.1  christos     shift
    363  1.1  christos 
    364  1.1  christos     local before after
    365  1.1  christos     before=$(grep_join \
    366  1.1  christos 		 "${grep_pre[@]}" \
    367  1.1  christos 		 "${grep_separators[@]}")
    368  1.1  christos     after=$(grep_join \
    369  1.1  christos 		"${grep_separators[@]}" \
    370  1.1  christos 		"${grep_post[@]}")
    371  1.1  christos 
    372  1.1  christos     if bash_version_at_least 5 1; then
    373  1.1  christos 	patc=${pat@u}
    374  1.1  christos     else
    375  1.1  christos 	# shellcheck disable=SC2001
    376  1.1  christos 	patc=$(echo "$pat" | sed 's/^\(.\)/\u\1/')
    377  1.1  christos     fi
    378  1.1  christos     pat="($patc|$pat)"
    379  1.1  christos 
    380  1.1  christos     pat="$before$pat$after"
    381  1.1  christos 
    382  1.1  christos     grep -E \
    383  1.1  christos 	-l \
    384  1.1  christos 	"$pat" \
    385  1.1  christos 	"$@"
    386  1.1  christos }
    387  1.1  christos 
    388  1.1  christos replace_word_in_file ()
    389  1.1  christos {
    390  1.1  christos     local word
    391  1.1  christos     word="$1"
    392  1.1  christos 
    393  1.1  christos     local replacement
    394  1.1  christos     replacement="$2"
    395  1.1  christos 
    396  1.1  christos     local file
    397  1.1  christos     file="$3"
    398  1.1  christos 
    399  1.1  christos     local before after
    400  1.1  christos     before=$(sed_join \
    401  1.1  christos 		 "${sed_pre[@]}" \
    402  1.1  christos 		 "${sed_separators[@]}")
    403  1.1  christos     after=$(sed_join \
    404  1.1  christos 		"${sed_separators[@]}" \
    405  1.1  christos 		"${sed_post[@]}")
    406  1.1  christos 
    407  1.1  christos     if bash_version_at_least 5 1; then
    408  1.1  christos 	wordc=${word@u}
    409  1.1  christos 	replacementc=${replacement@u}
    410  1.1  christos     else
    411  1.1  christos 	# shellcheck disable=SC2001
    412  1.1  christos 	wordc=$(echo "$word" | sed 's/^\(.\)/\u\1/')
    413  1.1  christos 	# shellcheck disable=SC2001
    414  1.1  christos 	replacementc=$(echo "$replacement" | sed 's/^\(.\)/\u\1/')
    415  1.1  christos     fi
    416  1.1  christos 
    417  1.1  christos     local repl1
    418  1.1  christos     local repl2
    419  1.1  christos     repl1="s%$before$word$after%\1$replacement\2%g"
    420  1.1  christos     repl2="s%$before$wordc$after%\1$replacementc\2%g"
    421  1.1  christos 
    422  1.1  christos     sed -i \
    423  1.1  christos 	"$repl1;$repl2" \
    424  1.1  christos 	"$file"
    425  1.1  christos }
    426  1.1  christos 
    427  1.1  christos replace_word_in_files ()
    428  1.1  christos {
    429  1.1  christos     local word
    430  1.1  christos     word="$1"
    431  1.1  christos 
    432  1.1  christos     local replacement
    433  1.1  christos     replacement="$2"
    434  1.1  christos 
    435  1.1  christos     shift 2
    436  1.1  christos 
    437  1.1  christos     local id
    438  1.1  christos     id="$word -> $replacement"
    439  1.1  christos 
    440  1.1  christos     # Reduce set of files for sed to operate on.
    441  1.1  christos     local files_matching_word
    442  1.1  christos     declare -a files_matching_word
    443  1.1  christos     mapfile -t files_matching_word \
    444  1.1  christos 	    < <(find_files_matching_word "$word" "$@")
    445  1.1  christos 
    446  1.1  christos     if [ ${#files_matching_word[@]} -eq 0 ]; then
    447  1.1  christos 	return
    448  1.1  christos     fi
    449  1.1  christos 
    450  1.1  christos     if echo "$replacement"| grep -q ","; then
    451  1.1  christos 	echo "TODO: $id"
    452  1.1  christos 	return
    453  1.1  christos     fi
    454  1.1  christos 
    455  1.1  christos     declare -A md5sums
    456  1.1  christos 
    457  1.1  christos     local changed f before after
    458  1.1  christos     changed=false
    459  1.1  christos     for f in "${files_matching_word[@]}"; do
    460  1.1  christos 	if [ "${md5sums[$f]}" = "" ]; then
    461  1.1  christos 	    md5sums[$f]=$(md5sum "$f")
    462  1.1  christos 	fi
    463  1.1  christos 
    464  1.1  christos 	before="${md5sums[$f]}"
    465  1.1  christos 
    466  1.1  christos 	replace_word_in_file \
    467  1.1  christos 	    "$word" \
    468  1.1  christos 	    "$replacement" \
    469  1.1  christos 	    "$f"
    470  1.1  christos 
    471  1.1  christos 	after=$(md5sum "$f")
    472  1.1  christos 
    473  1.1  christos 	if [ "$after" != "$before" ]; then
    474  1.1  christos 	    md5sums[$f]="$after"
    475  1.1  christos 	    changed=true
    476  1.1  christos 	fi
    477  1.1  christos     done
    478  1.1  christos 
    479  1.1  christos     if $changed; then
    480  1.1  christos 	echo "$id"
    481  1.1  christos     fi
    482  1.1  christos 
    483  1.1  christos     find_files_matching_word "$word" "${files_matching_word[@]}" \
    484  1.1  christos 	| awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
    485  1.1  christos }
    486  1.1  christos 
    487  1.1  christos main ()
    488  1.1  christos {
    489  1.1  christos     declare -a unique_files
    490  1.1  christos     check=false
    491  1.1  christos     print_dictionary=false
    492  1.1  christos     parse_args "$@"
    493  1.1  christos 
    494  1.1  christos     get_dictionary
    495  1.1  christos 
    496  1.1  christos     declare -a words
    497  1.1  christos     declare -a replacements
    498  1.1  christos     parse_dictionary
    499  1.1  christos 
    500  1.1  christos     if $print_dictionary; then
    501  1.1  christos 	print_dictionary
    502  1.1  christos 	exit 0
    503  1.1  christos     fi
    504  1.1  christos 
    505  1.1  christos     # Reduce set of files for sed to operate on.
    506  1.1  christos     local files_matching_words
    507  1.1  christos     declare -a files_matching_words
    508  1.1  christos     mapfile -t files_matching_words \
    509  1.1  christos 	    < <(find_files_matching_words "${unique_files[@]}")
    510  1.1  christos 
    511  1.1  christos     if [ ${#files_matching_words[@]} -eq 0 ]; then
    512  1.1  christos 	return
    513  1.1  christos     fi
    514  1.1  christos 
    515  1.1  christos     if $check; then
    516  1.1  christos 	exit 1
    517  1.1  christos     fi
    518  1.1  christos 
    519  1.1  christos     local i word replacement
    520  1.1  christos     i=0
    521  1.1  christos     for word in "${words[@]}"; do
    522  1.1  christos 	replacement=${replacements[i]}
    523  1.1  christos 	i=$((i + 1))
    524  1.1  christos 
    525  1.1  christos 	if [ "$word" = "" ]; then
    526  1.1  christos 	    continue
    527  1.1  christos 	fi
    528  1.1  christos 
    529  1.1  christos 	replace_word_in_files \
    530  1.1  christos 	    "$word" \
    531  1.1  christos 	    "$replacement" \
    532  1.1  christos 	    "${files_matching_words[@]}"
    533  1.1  christos     done
    534  1.1  christos }
    535  1.1  christos 
    536  1.1  christos main "$@"
    537