1 1.1 christos #!/bin/bash 2 1.1 christos 3 1.1 christos # Copyright (C) 2024 Free Software Foundation, Inc. 4 1.1 christos # This program is free software; you can redistribute it and/or modify 5 1.1 christos # it under the terms of the GNU General Public License as published by 6 1.1 christos # the Free Software Foundation; either version 3 of the License, or 7 1.1 christos # (at your option) any later version. 8 1.1 christos # 9 1.1 christos # This program is distributed in the hope that it will be useful, 10 1.1 christos # but WITHOUT ANY WARRANTY; without even the implied warranty of 11 1.1 christos # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 1.1 christos # GNU General Public License for more details. 13 1.1 christos # 14 1.1 christos # You should have received a copy of the GNU General Public License 15 1.1 christos # along with this program. If not, see <http://www.gnu.org/licenses/>. 16 1.1 christos 17 1.1 christos # Script to auto-correct common spelling mistakes. 18 1.1 christos # 19 1.1 christos # Example usage: 20 1.1 christos # $ ./gdb/contrib/spellcheck.sh gdb* 21 1.1 christos 22 1.1 christos scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P) 23 1.1 christos this_script=$scriptdir/$(basename "$0") 24 1.1 christos 25 1.1 christos url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines 26 1.1 christos cache_dir=$scriptdir/../../.git 27 1.1 christos cache_file=wikipedia-common-misspellings.txt 28 1.1 christos dictionary=$cache_dir/$cache_file 29 1.1 christos local_dictionary=$scriptdir/common-misspellings.txt 30 1.1 christos cache_file2=spell-check.pat1 31 1.1 christos 32 1.1 christos bash_version_at_least () 33 1.1 christos { 34 1.1 christos local major 35 1.1 christos major="$1" 36 1.1 christos local minor 37 1.1 christos minor="$2" 38 1.1 christos 39 1.1 christos if [ "$bash_major" = "" ]; then 40 1.1 christos bash_major=$(echo "$BASH_VERSION" | awk -F '.' '{print $1}') 41 1.1 christos bash_minor=$(echo "$BASH_VERSION" | awk -F '.' '{print $2}') 42 1.1 christos fi 43 1.1 christos 44 1.1 christos if [ "$bash_major" -lt "$major" ]; then 45 1.1 christos # Major version less then required, return false. 46 1.1 christos return 1 47 1.1 christos fi 48 1.1 christos 49 1.1 christos if [ "$bash_major" -gt "$major" ]; then 50 1.1 christos # Major version more then required, return true. 51 1.1 christos return 0 52 1.1 christos fi 53 1.1 christos 54 1.1 christos # Check minor version. 55 1.1 christos [ "$bash_minor" -ge "$minor" ] 56 1.1 christos } 57 1.1 christos 58 1.1 christos # Separators: space, slash, tab, colon, comma. 59 1.1 christos declare -a grep_separators 60 1.1 christos grep_separators=( 61 1.1 christos " " 62 1.1 christos "/" 63 1.1 christos " " 64 1.1 christos ":" 65 1.1 christos "," 66 1.1 christos "\"" 67 1.1 christos ) 68 1.1 christos declare -a sed_separators 69 1.1 christos sed_separators=( 70 1.1 christos " " 71 1.1 christos "/" 72 1.1 christos "\t" 73 1.1 christos ":" 74 1.1 christos "," 75 1.1 christos "\"" 76 1.1 christos ) 77 1.1 christos 78 1.1 christos # Pre: start of line, left parenthesis. 79 1.1 christos declare -a grep_pre 80 1.1 christos grep_pre=( 81 1.1 christos "^" 82 1.1 christos "\(" 83 1.1 christos ) 84 1.1 christos declare -a sed_pre 85 1.1 christos sed_pre=( 86 1.1 christos "^" 87 1.1 christos "(" 88 1.1 christos ) 89 1.1 christos 90 1.1 christos # Post: dot, right parenthesis, end of line. 91 1.1 christos declare -a grep_post 92 1.1 christos grep_post=( 93 1.1 christos "\." 94 1.1 christos "\)" 95 1.1 christos "$" 96 1.1 christos ) 97 1.1 christos declare -a sed_post 98 1.1 christos sed_post=( 99 1.1 christos "\." 100 1.1 christos ")" 101 1.1 christos "$" 102 1.1 christos ) 103 1.1 christos 104 1.1 christos join () 105 1.1 christos { 106 1.1 christos local or 107 1.1 christos or="$1" 108 1.1 christos shift 109 1.1 christos 110 1.1 christos local res 111 1.1 christos res="" 112 1.1 christos 113 1.1 christos local first 114 1.1 christos first=true 115 1.1 christos 116 1.1 christos for item in "$@"; do 117 1.1 christos if $first; then 118 1.1 christos first=false 119 1.1 christos res="$item" 120 1.1 christos else 121 1.1 christos res="$res$or$item" 122 1.1 christos fi 123 1.1 christos done 124 1.1 christos 125 1.1 christos echo "$res" 126 1.1 christos } 127 1.1 christos 128 1.1 christos grep_or="|" 129 1.1 christos sed_or="\|" 130 1.1 christos 131 1.1 christos grep_join () 132 1.1 christos { 133 1.1 christos local res 134 1.1 christos res=$(join $grep_or "$@") 135 1.1 christos echo "($res)" 136 1.1 christos } 137 1.1 christos 138 1.1 christos sed_join () 139 1.1 christos { 140 1.1 christos local res 141 1.1 christos res=$(join $sed_or "$@") 142 1.1 christos echo "\($res\)" 143 1.1 christos } 144 1.1 christos 145 1.1 christos usage () 146 1.1 christos { 147 1.1 christos echo "usage: $(basename "$0") [--check] <file|dir>+" 148 1.1 christos echo " $(basename "$0") --print-dictionary" 149 1.1 christos } 150 1.1 christos 151 1.1 christos make_absolute () 152 1.1 christos { 153 1.1 christos local arg 154 1.1 christos arg="$1" 155 1.1 christos 156 1.1 christos case "$arg" in 157 1.1 christos /*) 158 1.1 christos ;; 159 1.1 christos *) 160 1.1 christos arg=$(pwd -P)/"$arg" 161 1.1 christos ;; 162 1.1 christos esac 163 1.1 christos 164 1.1 christos echo "$arg" 165 1.1 christos } 166 1.1 christos 167 1.1 christos parse_args () 168 1.1 christos { 169 1.1 christos local files 170 1.1 christos files=$(mktemp) 171 1.1 christos trap 'rm -f "$files"' EXIT 172 1.1 christos 173 1.1 christos if [ $# -eq 1 ] && [ "$1" = "--print-dictionary" ]; then 174 1.1 christos print_dictionary=true 175 1.1 christos return 176 1.1 christos fi 177 1.1 christos 178 1.1 christos while true; do 179 1.1 christos case " $1 " in 180 1.1 christos " --check ") 181 1.1 christos check=true 182 1.1 christos shift 183 1.1 christos ;; 184 1.1 christos *) 185 1.1 christos break 186 1.1 christos ;; 187 1.1 christos esac 188 1.1 christos done 189 1.1 christos 190 1.1 christos if [ $# -eq -0 ]; then 191 1.1 christos usage 192 1.1 christos exit 1 193 1.1 christos fi 194 1.1 christos 195 1.1 christos local arg 196 1.1 christos for arg in "$@"; do 197 1.1 christos if [ -f "$arg" ]; then 198 1.1 christos arg=$(make_absolute "$arg") 199 1.1 christos readlink -e "$arg" \ 200 1.1 christos >> "$files" 201 1.1 christos elif [ -d "$arg" ]; then 202 1.1 christos arg=$(make_absolute "$arg") 203 1.1 christos local f 204 1.1 christos find "$arg" -type f -exec readlink -e {} \; \ 205 1.1 christos >> "$files" 206 1.1 christos else 207 1.1 christos echo "Not a file or directory: $arg" 208 1.1 christos exit 1 209 1.1 christos fi 210 1.1 christos done 211 1.1 christos 212 1.1 christos mapfile -t unique_files \ 213 1.1 christos < <(sort -u "$files" \ 214 1.1 christos | grep -v ChangeLog) 215 1.1 christos 216 1.1 christos rm -f "$files" 217 1.1 christos trap "" EXIT 218 1.1 christos } 219 1.1 christos 220 1.1 christos get_dictionary () 221 1.1 christos { 222 1.1 christos if [ -f "$dictionary" ]; then 223 1.1 christos return 224 1.1 christos fi 225 1.1 christos 226 1.1 christos local webpage 227 1.1 christos webpage=$(mktemp) 228 1.1 christos trap 'rm -f "$webpage"' EXIT 229 1.1 christos 230 1.1 christos # Download web page containing table. 231 1.1 christos wget $url -O "$webpage" 232 1.1 christos 233 1.1 christos # Extract table from web page. 234 1.1 christos awk '/<pre>/,/<\/pre>/' "$webpage" \ 235 1.1 christos | sed 's/<pre>//;s/<\/pre>//' \ 236 1.1 christos | grep -E -v "^$" \ 237 1.1 christos > "$dictionary" 238 1.1 christos 239 1.1 christos rm -f "$webpage" 240 1.1 christos trap "" EXIT 241 1.1 christos } 242 1.1 christos 243 1.1 christos output_local_dictionary () 244 1.1 christos { 245 1.1 christos # Filter out comments and empty lines. 246 1.1 christos grep -E -v \ 247 1.1 christos "^#|^$" \ 248 1.1 christos "$local_dictionary" 249 1.1 christos } 250 1.1 christos 251 1.1 christos output_dictionaries () 252 1.1 christos { 253 1.1 christos ( 254 1.1 christos output_local_dictionary 255 1.1 christos cat "$dictionary" 256 1.1 christos ) | grep -E -v "[A-Z]" 257 1.1 christos } 258 1.1 christos 259 1.1 christos parse_dictionary () 260 1.1 christos { 261 1.1 christos # Parse dictionary. 262 1.1 christos mapfile -t words \ 263 1.1 christos < <(awk -F '->' '{print $1}' <(output_dictionaries)) 264 1.1 christos mapfile -t replacements \ 265 1.1 christos < <(awk -F '->' '{print $2}' <(output_dictionaries)) 266 1.1 christos 267 1.1 christos local words_done 268 1.1 christos declare -A words_done 269 1.1 christos local i word replacement 270 1.1 christos i=0 271 1.1 christos for word in "${words[@]}"; do 272 1.1 christos replacement=${replacements[i]} 273 1.1 christos 274 1.1 christos # Skip words that are already handled. This ensures that the local 275 1.1 christos # dictionary overrides the wiki dictionary. 276 1.1 christos if [ "${words_done[$word]}" == 1 ]; then 277 1.1 christos words[i]="" 278 1.1 christos replacements[i]="" 279 1.1 christos i=$((i + 1)) 280 1.1 christos continue 281 1.1 christos fi 282 1.1 christos words_done[$word]=1 283 1.1 christos 284 1.1 christos # Skip identity rules. 285 1.1 christos if [ "$word" = "$replacement" ]; then 286 1.1 christos words[i]="" 287 1.1 christos replacements[i]="" 288 1.1 christos fi 289 1.1 christos 290 1.1 christos i=$((i + 1)) 291 1.1 christos done 292 1.1 christos } 293 1.1 christos 294 1.1 christos print_dictionary () 295 1.1 christos { 296 1.1 christos local i word replacement 297 1.1 christos i=0 298 1.1 christos for word in "${words[@]}"; do 299 1.1 christos replacement=${replacements[i]} 300 1.1 christos i=$((i + 1)) 301 1.1 christos 302 1.1 christos if [ "$word" == "" ]; then 303 1.1 christos continue 304 1.1 christos fi 305 1.1 christos 306 1.1 christos echo "$word -> $replacement" 307 1.1 christos done 308 1.1 christos } 309 1.1 christos 310 1.1 christos find_files_matching_words () 311 1.1 christos { 312 1.1 christos local cache_id 313 1.1 christos cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \ 314 1.1 christos | md5sum \ 315 1.1 christos | awk '{print $1}') 316 1.1 christos 317 1.1 christos local patfile 318 1.1 christos patfile="$cache_dir/$cache_file2".$cache_id 319 1.1 christos 320 1.1 christos local pat 321 1.1 christos if [ -f "$patfile" ]; then 322 1.1 christos pat=$(cat "$patfile") 323 1.1 christos else 324 1.1 christos rm -f "$cache_dir/$cache_file2".* 325 1.1 christos 326 1.1 christos declare -a re_words 327 1.1 christos mapfile -t re_words \ 328 1.1 christos < <(for f in "${words[@]}"; do 329 1.1 christos if [ "$f" = "" ]; then 330 1.1 christos continue 331 1.1 christos fi 332 1.1 christos echo "$f" 333 1.1 christos done \ 334 1.1 christos | sed "s/^\(.\)/[\u\1\1]/") 335 1.1 christos 336 1.1 christos pat=$(grep_join "${re_words[@]}") 337 1.1 christos 338 1.1 christos local before after 339 1.1 christos before=$(grep_join \ 340 1.1 christos "${grep_pre[@]}" \ 341 1.1 christos "${grep_separators[@]}") 342 1.1 christos after=$(grep_join \ 343 1.1 christos "${grep_separators[@]}" \ 344 1.1 christos "${grep_post[@]}") 345 1.1 christos 346 1.1 christos pat="$before$pat$after" 347 1.1 christos 348 1.1 christos echo "$pat" \ 349 1.1 christos > "$patfile" 350 1.1 christos fi 351 1.1 christos 352 1.1 christos grep -E \ 353 1.1 christos -l \ 354 1.1 christos "$pat" \ 355 1.1 christos "$@" 356 1.1 christos } 357 1.1 christos 358 1.1 christos find_files_matching_word () 359 1.1 christos { 360 1.1 christos local pat 361 1.1 christos pat="$1" 362 1.1 christos shift 363 1.1 christos 364 1.1 christos local before after 365 1.1 christos before=$(grep_join \ 366 1.1 christos "${grep_pre[@]}" \ 367 1.1 christos "${grep_separators[@]}") 368 1.1 christos after=$(grep_join \ 369 1.1 christos "${grep_separators[@]}" \ 370 1.1 christos "${grep_post[@]}") 371 1.1 christos 372 1.1 christos if bash_version_at_least 5 1; then 373 1.1 christos patc=${pat@u} 374 1.1 christos else 375 1.1 christos # shellcheck disable=SC2001 376 1.1 christos patc=$(echo "$pat" | sed 's/^\(.\)/\u\1/') 377 1.1 christos fi 378 1.1 christos pat="($patc|$pat)" 379 1.1 christos 380 1.1 christos pat="$before$pat$after" 381 1.1 christos 382 1.1 christos grep -E \ 383 1.1 christos -l \ 384 1.1 christos "$pat" \ 385 1.1 christos "$@" 386 1.1 christos } 387 1.1 christos 388 1.1 christos replace_word_in_file () 389 1.1 christos { 390 1.1 christos local word 391 1.1 christos word="$1" 392 1.1 christos 393 1.1 christos local replacement 394 1.1 christos replacement="$2" 395 1.1 christos 396 1.1 christos local file 397 1.1 christos file="$3" 398 1.1 christos 399 1.1 christos local before after 400 1.1 christos before=$(sed_join \ 401 1.1 christos "${sed_pre[@]}" \ 402 1.1 christos "${sed_separators[@]}") 403 1.1 christos after=$(sed_join \ 404 1.1 christos "${sed_separators[@]}" \ 405 1.1 christos "${sed_post[@]}") 406 1.1 christos 407 1.1 christos if bash_version_at_least 5 1; then 408 1.1 christos wordc=${word@u} 409 1.1 christos replacementc=${replacement@u} 410 1.1 christos else 411 1.1 christos # shellcheck disable=SC2001 412 1.1 christos wordc=$(echo "$word" | sed 's/^\(.\)/\u\1/') 413 1.1 christos # shellcheck disable=SC2001 414 1.1 christos replacementc=$(echo "$replacement" | sed 's/^\(.\)/\u\1/') 415 1.1 christos fi 416 1.1 christos 417 1.1 christos local repl1 418 1.1 christos local repl2 419 1.1 christos repl1="s%$before$word$after%\1$replacement\2%g" 420 1.1 christos repl2="s%$before$wordc$after%\1$replacementc\2%g" 421 1.1 christos 422 1.1 christos sed -i \ 423 1.1 christos "$repl1;$repl2" \ 424 1.1 christos "$file" 425 1.1 christos } 426 1.1 christos 427 1.1 christos replace_word_in_files () 428 1.1 christos { 429 1.1 christos local word 430 1.1 christos word="$1" 431 1.1 christos 432 1.1 christos local replacement 433 1.1 christos replacement="$2" 434 1.1 christos 435 1.1 christos shift 2 436 1.1 christos 437 1.1 christos local id 438 1.1 christos id="$word -> $replacement" 439 1.1 christos 440 1.1 christos # Reduce set of files for sed to operate on. 441 1.1 christos local files_matching_word 442 1.1 christos declare -a files_matching_word 443 1.1 christos mapfile -t files_matching_word \ 444 1.1 christos < <(find_files_matching_word "$word" "$@") 445 1.1 christos 446 1.1 christos if [ ${#files_matching_word[@]} -eq 0 ]; then 447 1.1 christos return 448 1.1 christos fi 449 1.1 christos 450 1.1 christos if echo "$replacement"| grep -q ","; then 451 1.1 christos echo "TODO: $id" 452 1.1 christos return 453 1.1 christos fi 454 1.1 christos 455 1.1 christos declare -A md5sums 456 1.1 christos 457 1.1 christos local changed f before after 458 1.1 christos changed=false 459 1.1 christos for f in "${files_matching_word[@]}"; do 460 1.1 christos if [ "${md5sums[$f]}" = "" ]; then 461 1.1 christos md5sums[$f]=$(md5sum "$f") 462 1.1 christos fi 463 1.1 christos 464 1.1 christos before="${md5sums[$f]}" 465 1.1 christos 466 1.1 christos replace_word_in_file \ 467 1.1 christos "$word" \ 468 1.1 christos "$replacement" \ 469 1.1 christos "$f" 470 1.1 christos 471 1.1 christos after=$(md5sum "$f") 472 1.1 christos 473 1.1 christos if [ "$after" != "$before" ]; then 474 1.1 christos md5sums[$f]="$after" 475 1.1 christos changed=true 476 1.1 christos fi 477 1.1 christos done 478 1.1 christos 479 1.1 christos if $changed; then 480 1.1 christos echo "$id" 481 1.1 christos fi 482 1.1 christos 483 1.1 christos find_files_matching_word "$word" "${files_matching_word[@]}" \ 484 1.1 christos | awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}" 485 1.1 christos } 486 1.1 christos 487 1.1 christos main () 488 1.1 christos { 489 1.1 christos declare -a unique_files 490 1.1 christos check=false 491 1.1 christos print_dictionary=false 492 1.1 christos parse_args "$@" 493 1.1 christos 494 1.1 christos get_dictionary 495 1.1 christos 496 1.1 christos declare -a words 497 1.1 christos declare -a replacements 498 1.1 christos parse_dictionary 499 1.1 christos 500 1.1 christos if $print_dictionary; then 501 1.1 christos print_dictionary 502 1.1 christos exit 0 503 1.1 christos fi 504 1.1 christos 505 1.1 christos # Reduce set of files for sed to operate on. 506 1.1 christos local files_matching_words 507 1.1 christos declare -a files_matching_words 508 1.1 christos mapfile -t files_matching_words \ 509 1.1 christos < <(find_files_matching_words "${unique_files[@]}") 510 1.1 christos 511 1.1 christos if [ ${#files_matching_words[@]} -eq 0 ]; then 512 1.1 christos return 513 1.1 christos fi 514 1.1 christos 515 1.1 christos if $check; then 516 1.1 christos exit 1 517 1.1 christos fi 518 1.1 christos 519 1.1 christos local i word replacement 520 1.1 christos i=0 521 1.1 christos for word in "${words[@]}"; do 522 1.1 christos replacement=${replacements[i]} 523 1.1 christos i=$((i + 1)) 524 1.1 christos 525 1.1 christos if [ "$word" = "" ]; then 526 1.1 christos continue 527 1.1 christos fi 528 1.1 christos 529 1.1 christos replace_word_in_files \ 530 1.1 christos "$word" \ 531 1.1 christos "$replacement" \ 532 1.1 christos "${files_matching_words[@]}" 533 1.1 christos done 534 1.1 christos } 535 1.1 christos 536 1.1 christos main "$@" 537