spellcheck.sh revision 1.1 1 1.1 christos #!/bin/bash
2 1.1 christos
3 1.1 christos # Copyright (C) 2024 Free Software Foundation, Inc.
4 1.1 christos # This program is free software; you can redistribute it and/or modify
5 1.1 christos # it under the terms of the GNU General Public License as published by
6 1.1 christos # the Free Software Foundation; either version 3 of the License, or
7 1.1 christos # (at your option) any later version.
8 1.1 christos #
9 1.1 christos # This program is distributed in the hope that it will be useful,
10 1.1 christos # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 1.1 christos # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 1.1 christos # GNU General Public License for more details.
13 1.1 christos #
14 1.1 christos # You should have received a copy of the GNU General Public License
15 1.1 christos # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 1.1 christos
17 1.1 christos # Script to auto-correct common spelling mistakes.
18 1.1 christos #
19 1.1 christos # Example usage:
20 1.1 christos # $ ./gdb/contrib/spellcheck.sh gdb*
21 1.1 christos
22 1.1 christos scriptdir=$(cd "$(dirname "$0")" || exit; pwd -P)
23 1.1 christos this_script=$scriptdir/$(basename "$0")
24 1.1 christos
25 1.1 christos url=https://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
26 1.1 christos cache_dir=$scriptdir/../../.git
27 1.1 christos cache_file=wikipedia-common-misspellings.txt
28 1.1 christos dictionary=$cache_dir/$cache_file
29 1.1 christos local_dictionary=$scriptdir/common-misspellings.txt
30 1.1 christos cache_file2=spell-check.pat1
31 1.1 christos
32 1.1 christos bash_version_at_least ()
33 1.1 christos {
34 1.1 christos local major
35 1.1 christos major="$1"
36 1.1 christos local minor
37 1.1 christos minor="$2"
38 1.1 christos
39 1.1 christos if [ "$bash_major" = "" ]; then
40 1.1 christos bash_major=$(echo "$BASH_VERSION" | awk -F '.' '{print $1}')
41 1.1 christos bash_minor=$(echo "$BASH_VERSION" | awk -F '.' '{print $2}')
42 1.1 christos fi
43 1.1 christos
44 1.1 christos if [ "$bash_major" -lt "$major" ]; then
45 1.1 christos # Major version less then required, return false.
46 1.1 christos return 1
47 1.1 christos fi
48 1.1 christos
49 1.1 christos if [ "$bash_major" -gt "$major" ]; then
50 1.1 christos # Major version more then required, return true.
51 1.1 christos return 0
52 1.1 christos fi
53 1.1 christos
54 1.1 christos # Check minor version.
55 1.1 christos [ "$bash_minor" -ge "$minor" ]
56 1.1 christos }
57 1.1 christos
58 1.1 christos # Separators: space, slash, tab, colon, comma.
59 1.1 christos declare -a grep_separators
60 1.1 christos grep_separators=(
61 1.1 christos " "
62 1.1 christos "/"
63 1.1 christos " "
64 1.1 christos ":"
65 1.1 christos ","
66 1.1 christos "\""
67 1.1 christos )
68 1.1 christos declare -a sed_separators
69 1.1 christos sed_separators=(
70 1.1 christos " "
71 1.1 christos "/"
72 1.1 christos "\t"
73 1.1 christos ":"
74 1.1 christos ","
75 1.1 christos "\""
76 1.1 christos )
77 1.1 christos
78 1.1 christos # Pre: start of line, left parenthesis.
79 1.1 christos declare -a grep_pre
80 1.1 christos grep_pre=(
81 1.1 christos "^"
82 1.1 christos "\("
83 1.1 christos )
84 1.1 christos declare -a sed_pre
85 1.1 christos sed_pre=(
86 1.1 christos "^"
87 1.1 christos "("
88 1.1 christos )
89 1.1 christos
90 1.1 christos # Post: dot, right parenthesis, end of line.
91 1.1 christos declare -a grep_post
92 1.1 christos grep_post=(
93 1.1 christos "\."
94 1.1 christos "\)"
95 1.1 christos "$"
96 1.1 christos )
97 1.1 christos declare -a sed_post
98 1.1 christos sed_post=(
99 1.1 christos "\."
100 1.1 christos ")"
101 1.1 christos "$"
102 1.1 christos )
103 1.1 christos
104 1.1 christos join ()
105 1.1 christos {
106 1.1 christos local or
107 1.1 christos or="$1"
108 1.1 christos shift
109 1.1 christos
110 1.1 christos local res
111 1.1 christos res=""
112 1.1 christos
113 1.1 christos local first
114 1.1 christos first=true
115 1.1 christos
116 1.1 christos for item in "$@"; do
117 1.1 christos if $first; then
118 1.1 christos first=false
119 1.1 christos res="$item"
120 1.1 christos else
121 1.1 christos res="$res$or$item"
122 1.1 christos fi
123 1.1 christos done
124 1.1 christos
125 1.1 christos echo "$res"
126 1.1 christos }
127 1.1 christos
128 1.1 christos grep_or="|"
129 1.1 christos sed_or="\|"
130 1.1 christos
131 1.1 christos grep_join ()
132 1.1 christos {
133 1.1 christos local res
134 1.1 christos res=$(join $grep_or "$@")
135 1.1 christos echo "($res)"
136 1.1 christos }
137 1.1 christos
138 1.1 christos sed_join ()
139 1.1 christos {
140 1.1 christos local res
141 1.1 christos res=$(join $sed_or "$@")
142 1.1 christos echo "\($res\)"
143 1.1 christos }
144 1.1 christos
145 1.1 christos usage ()
146 1.1 christos {
147 1.1 christos echo "usage: $(basename "$0") [--check] <file|dir>+"
148 1.1 christos echo " $(basename "$0") --print-dictionary"
149 1.1 christos }
150 1.1 christos
151 1.1 christos make_absolute ()
152 1.1 christos {
153 1.1 christos local arg
154 1.1 christos arg="$1"
155 1.1 christos
156 1.1 christos case "$arg" in
157 1.1 christos /*)
158 1.1 christos ;;
159 1.1 christos *)
160 1.1 christos arg=$(pwd -P)/"$arg"
161 1.1 christos ;;
162 1.1 christos esac
163 1.1 christos
164 1.1 christos echo "$arg"
165 1.1 christos }
166 1.1 christos
167 1.1 christos parse_args ()
168 1.1 christos {
169 1.1 christos local files
170 1.1 christos files=$(mktemp)
171 1.1 christos trap 'rm -f "$files"' EXIT
172 1.1 christos
173 1.1 christos if [ $# -eq 1 ] && [ "$1" = "--print-dictionary" ]; then
174 1.1 christos print_dictionary=true
175 1.1 christos return
176 1.1 christos fi
177 1.1 christos
178 1.1 christos while true; do
179 1.1 christos case " $1 " in
180 1.1 christos " --check ")
181 1.1 christos check=true
182 1.1 christos shift
183 1.1 christos ;;
184 1.1 christos *)
185 1.1 christos break
186 1.1 christos ;;
187 1.1 christos esac
188 1.1 christos done
189 1.1 christos
190 1.1 christos if [ $# -eq -0 ]; then
191 1.1 christos usage
192 1.1 christos exit 1
193 1.1 christos fi
194 1.1 christos
195 1.1 christos local arg
196 1.1 christos for arg in "$@"; do
197 1.1 christos if [ -f "$arg" ]; then
198 1.1 christos arg=$(make_absolute "$arg")
199 1.1 christos readlink -e "$arg" \
200 1.1 christos >> "$files"
201 1.1 christos elif [ -d "$arg" ]; then
202 1.1 christos arg=$(make_absolute "$arg")
203 1.1 christos local f
204 1.1 christos find "$arg" -type f -exec readlink -e {} \; \
205 1.1 christos >> "$files"
206 1.1 christos else
207 1.1 christos echo "Not a file or directory: $arg"
208 1.1 christos exit 1
209 1.1 christos fi
210 1.1 christos done
211 1.1 christos
212 1.1 christos mapfile -t unique_files \
213 1.1 christos < <(sort -u "$files" \
214 1.1 christos | grep -v ChangeLog)
215 1.1 christos
216 1.1 christos rm -f "$files"
217 1.1 christos trap "" EXIT
218 1.1 christos }
219 1.1 christos
220 1.1 christos get_dictionary ()
221 1.1 christos {
222 1.1 christos if [ -f "$dictionary" ]; then
223 1.1 christos return
224 1.1 christos fi
225 1.1 christos
226 1.1 christos local webpage
227 1.1 christos webpage=$(mktemp)
228 1.1 christos trap 'rm -f "$webpage"' EXIT
229 1.1 christos
230 1.1 christos # Download web page containing table.
231 1.1 christos wget $url -O "$webpage"
232 1.1 christos
233 1.1 christos # Extract table from web page.
234 1.1 christos awk '/<pre>/,/<\/pre>/' "$webpage" \
235 1.1 christos | sed 's/<pre>//;s/<\/pre>//' \
236 1.1 christos | grep -E -v "^$" \
237 1.1 christos > "$dictionary"
238 1.1 christos
239 1.1 christos rm -f "$webpage"
240 1.1 christos trap "" EXIT
241 1.1 christos }
242 1.1 christos
243 1.1 christos output_local_dictionary ()
244 1.1 christos {
245 1.1 christos # Filter out comments and empty lines.
246 1.1 christos grep -E -v \
247 1.1 christos "^#|^$" \
248 1.1 christos "$local_dictionary"
249 1.1 christos }
250 1.1 christos
251 1.1 christos output_dictionaries ()
252 1.1 christos {
253 1.1 christos (
254 1.1 christos output_local_dictionary
255 1.1 christos cat "$dictionary"
256 1.1 christos ) | grep -E -v "[A-Z]"
257 1.1 christos }
258 1.1 christos
259 1.1 christos parse_dictionary ()
260 1.1 christos {
261 1.1 christos # Parse dictionary.
262 1.1 christos mapfile -t words \
263 1.1 christos < <(awk -F '->' '{print $1}' <(output_dictionaries))
264 1.1 christos mapfile -t replacements \
265 1.1 christos < <(awk -F '->' '{print $2}' <(output_dictionaries))
266 1.1 christos
267 1.1 christos local words_done
268 1.1 christos declare -A words_done
269 1.1 christos local i word replacement
270 1.1 christos i=0
271 1.1 christos for word in "${words[@]}"; do
272 1.1 christos replacement=${replacements[i]}
273 1.1 christos
274 1.1 christos # Skip words that are already handled. This ensures that the local
275 1.1 christos # dictionary overrides the wiki dictionary.
276 1.1 christos if [ "${words_done[$word]}" == 1 ]; then
277 1.1 christos words[i]=""
278 1.1 christos replacements[i]=""
279 1.1 christos i=$((i + 1))
280 1.1 christos continue
281 1.1 christos fi
282 1.1 christos words_done[$word]=1
283 1.1 christos
284 1.1 christos # Skip identity rules.
285 1.1 christos if [ "$word" = "$replacement" ]; then
286 1.1 christos words[i]=""
287 1.1 christos replacements[i]=""
288 1.1 christos fi
289 1.1 christos
290 1.1 christos i=$((i + 1))
291 1.1 christos done
292 1.1 christos }
293 1.1 christos
294 1.1 christos print_dictionary ()
295 1.1 christos {
296 1.1 christos local i word replacement
297 1.1 christos i=0
298 1.1 christos for word in "${words[@]}"; do
299 1.1 christos replacement=${replacements[i]}
300 1.1 christos i=$((i + 1))
301 1.1 christos
302 1.1 christos if [ "$word" == "" ]; then
303 1.1 christos continue
304 1.1 christos fi
305 1.1 christos
306 1.1 christos echo "$word -> $replacement"
307 1.1 christos done
308 1.1 christos }
309 1.1 christos
310 1.1 christos find_files_matching_words ()
311 1.1 christos {
312 1.1 christos local cache_id
313 1.1 christos cache_id=$(cat "$local_dictionary" "$dictionary" "$this_script" \
314 1.1 christos | md5sum \
315 1.1 christos | awk '{print $1}')
316 1.1 christos
317 1.1 christos local patfile
318 1.1 christos patfile="$cache_dir/$cache_file2".$cache_id
319 1.1 christos
320 1.1 christos local pat
321 1.1 christos if [ -f "$patfile" ]; then
322 1.1 christos pat=$(cat "$patfile")
323 1.1 christos else
324 1.1 christos rm -f "$cache_dir/$cache_file2".*
325 1.1 christos
326 1.1 christos declare -a re_words
327 1.1 christos mapfile -t re_words \
328 1.1 christos < <(for f in "${words[@]}"; do
329 1.1 christos if [ "$f" = "" ]; then
330 1.1 christos continue
331 1.1 christos fi
332 1.1 christos echo "$f"
333 1.1 christos done \
334 1.1 christos | sed "s/^\(.\)/[\u\1\1]/")
335 1.1 christos
336 1.1 christos pat=$(grep_join "${re_words[@]}")
337 1.1 christos
338 1.1 christos local before after
339 1.1 christos before=$(grep_join \
340 1.1 christos "${grep_pre[@]}" \
341 1.1 christos "${grep_separators[@]}")
342 1.1 christos after=$(grep_join \
343 1.1 christos "${grep_separators[@]}" \
344 1.1 christos "${grep_post[@]}")
345 1.1 christos
346 1.1 christos pat="$before$pat$after"
347 1.1 christos
348 1.1 christos echo "$pat" \
349 1.1 christos > "$patfile"
350 1.1 christos fi
351 1.1 christos
352 1.1 christos grep -E \
353 1.1 christos -l \
354 1.1 christos "$pat" \
355 1.1 christos "$@"
356 1.1 christos }
357 1.1 christos
358 1.1 christos find_files_matching_word ()
359 1.1 christos {
360 1.1 christos local pat
361 1.1 christos pat="$1"
362 1.1 christos shift
363 1.1 christos
364 1.1 christos local before after
365 1.1 christos before=$(grep_join \
366 1.1 christos "${grep_pre[@]}" \
367 1.1 christos "${grep_separators[@]}")
368 1.1 christos after=$(grep_join \
369 1.1 christos "${grep_separators[@]}" \
370 1.1 christos "${grep_post[@]}")
371 1.1 christos
372 1.1 christos if bash_version_at_least 5 1; then
373 1.1 christos patc=${pat@u}
374 1.1 christos else
375 1.1 christos # shellcheck disable=SC2001
376 1.1 christos patc=$(echo "$pat" | sed 's/^\(.\)/\u\1/')
377 1.1 christos fi
378 1.1 christos pat="($patc|$pat)"
379 1.1 christos
380 1.1 christos pat="$before$pat$after"
381 1.1 christos
382 1.1 christos grep -E \
383 1.1 christos -l \
384 1.1 christos "$pat" \
385 1.1 christos "$@"
386 1.1 christos }
387 1.1 christos
388 1.1 christos replace_word_in_file ()
389 1.1 christos {
390 1.1 christos local word
391 1.1 christos word="$1"
392 1.1 christos
393 1.1 christos local replacement
394 1.1 christos replacement="$2"
395 1.1 christos
396 1.1 christos local file
397 1.1 christos file="$3"
398 1.1 christos
399 1.1 christos local before after
400 1.1 christos before=$(sed_join \
401 1.1 christos "${sed_pre[@]}" \
402 1.1 christos "${sed_separators[@]}")
403 1.1 christos after=$(sed_join \
404 1.1 christos "${sed_separators[@]}" \
405 1.1 christos "${sed_post[@]}")
406 1.1 christos
407 1.1 christos if bash_version_at_least 5 1; then
408 1.1 christos wordc=${word@u}
409 1.1 christos replacementc=${replacement@u}
410 1.1 christos else
411 1.1 christos # shellcheck disable=SC2001
412 1.1 christos wordc=$(echo "$word" | sed 's/^\(.\)/\u\1/')
413 1.1 christos # shellcheck disable=SC2001
414 1.1 christos replacementc=$(echo "$replacement" | sed 's/^\(.\)/\u\1/')
415 1.1 christos fi
416 1.1 christos
417 1.1 christos local repl1
418 1.1 christos local repl2
419 1.1 christos repl1="s%$before$word$after%\1$replacement\2%g"
420 1.1 christos repl2="s%$before$wordc$after%\1$replacementc\2%g"
421 1.1 christos
422 1.1 christos sed -i \
423 1.1 christos "$repl1;$repl2" \
424 1.1 christos "$file"
425 1.1 christos }
426 1.1 christos
427 1.1 christos replace_word_in_files ()
428 1.1 christos {
429 1.1 christos local word
430 1.1 christos word="$1"
431 1.1 christos
432 1.1 christos local replacement
433 1.1 christos replacement="$2"
434 1.1 christos
435 1.1 christos shift 2
436 1.1 christos
437 1.1 christos local id
438 1.1 christos id="$word -> $replacement"
439 1.1 christos
440 1.1 christos # Reduce set of files for sed to operate on.
441 1.1 christos local files_matching_word
442 1.1 christos declare -a files_matching_word
443 1.1 christos mapfile -t files_matching_word \
444 1.1 christos < <(find_files_matching_word "$word" "$@")
445 1.1 christos
446 1.1 christos if [ ${#files_matching_word[@]} -eq 0 ]; then
447 1.1 christos return
448 1.1 christos fi
449 1.1 christos
450 1.1 christos if echo "$replacement"| grep -q ","; then
451 1.1 christos echo "TODO: $id"
452 1.1 christos return
453 1.1 christos fi
454 1.1 christos
455 1.1 christos declare -A md5sums
456 1.1 christos
457 1.1 christos local changed f before after
458 1.1 christos changed=false
459 1.1 christos for f in "${files_matching_word[@]}"; do
460 1.1 christos if [ "${md5sums[$f]}" = "" ]; then
461 1.1 christos md5sums[$f]=$(md5sum "$f")
462 1.1 christos fi
463 1.1 christos
464 1.1 christos before="${md5sums[$f]}"
465 1.1 christos
466 1.1 christos replace_word_in_file \
467 1.1 christos "$word" \
468 1.1 christos "$replacement" \
469 1.1 christos "$f"
470 1.1 christos
471 1.1 christos after=$(md5sum "$f")
472 1.1 christos
473 1.1 christos if [ "$after" != "$before" ]; then
474 1.1 christos md5sums[$f]="$after"
475 1.1 christos changed=true
476 1.1 christos fi
477 1.1 christos done
478 1.1 christos
479 1.1 christos if $changed; then
480 1.1 christos echo "$id"
481 1.1 christos fi
482 1.1 christos
483 1.1 christos find_files_matching_word "$word" "${files_matching_word[@]}" \
484 1.1 christos | awk "{ printf \"TODO: $id: replacement failed: %s\n\", \$0}"
485 1.1 christos }
486 1.1 christos
487 1.1 christos main ()
488 1.1 christos {
489 1.1 christos declare -a unique_files
490 1.1 christos check=false
491 1.1 christos print_dictionary=false
492 1.1 christos parse_args "$@"
493 1.1 christos
494 1.1 christos get_dictionary
495 1.1 christos
496 1.1 christos declare -a words
497 1.1 christos declare -a replacements
498 1.1 christos parse_dictionary
499 1.1 christos
500 1.1 christos if $print_dictionary; then
501 1.1 christos print_dictionary
502 1.1 christos exit 0
503 1.1 christos fi
504 1.1 christos
505 1.1 christos # Reduce set of files for sed to operate on.
506 1.1 christos local files_matching_words
507 1.1 christos declare -a files_matching_words
508 1.1 christos mapfile -t files_matching_words \
509 1.1 christos < <(find_files_matching_words "${unique_files[@]}")
510 1.1 christos
511 1.1 christos if [ ${#files_matching_words[@]} -eq 0 ]; then
512 1.1 christos return
513 1.1 christos fi
514 1.1 christos
515 1.1 christos if $check; then
516 1.1 christos exit 1
517 1.1 christos fi
518 1.1 christos
519 1.1 christos local i word replacement
520 1.1 christos i=0
521 1.1 christos for word in "${words[@]}"; do
522 1.1 christos replacement=${replacements[i]}
523 1.1 christos i=$((i + 1))
524 1.1 christos
525 1.1 christos if [ "$word" = "" ]; then
526 1.1 christos continue
527 1.1 christos fi
528 1.1 christos
529 1.1 christos replace_word_in_files \
530 1.1 christos "$word" \
531 1.1 christos "$replacement" \
532 1.1 christos "${files_matching_words[@]}"
533 1.1 christos done
534 1.1 christos }
535 1.1 christos
536 1.1 christos main "$@"
537