words.sh revision 1.1.1.2 1 1.1 christos #!/bin/sh
2 1.1 christos
3 1.1.1.2 christos # Copyright (C) 2019-2023 Free Software Foundation, Inc.
4 1.1 christos # This program is free software; you can redistribute it and/or modify
5 1.1 christos # it under the terms of the GNU General Public License as published by
6 1.1 christos # the Free Software Foundation; either version 3 of the License, or
7 1.1 christos # (at your option) any later version.
8 1.1 christos #
9 1.1 christos # This program is distributed in the hope that it will be useful,
10 1.1 christos # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 1.1 christos # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 1.1 christos # GNU General Public License for more details.
13 1.1 christos #
14 1.1 christos # You should have received a copy of the GNU General Public License
15 1.1 christos # along with this program. If not, see <http://www.gnu.org/licenses/>.
16 1.1 christos
17 1.1 christos # This script intends to facilitate spell checking of source/doc files.
18 1.1 christos # It:
19 1.1 christos # - transforms the files into a list of lowercase words
20 1.1 christos # - prefixes each word with the frequency
21 1.1 christos # - filters out words within a frequency range
22 1.1 christos # - sorts the words, longest first
23 1.1 christos #
24 1.1 christos # If '-c' is passed as option, it operates on the C comments only, rather than
25 1.1 christos # on the entire file.
26 1.1 christos #
27 1.1 christos # For:
28 1.1 christos # ...
29 1.1 christos # $ files=$(find gdb -type f -name "*.c" -o -name "*.h")
30 1.1 christos # $ ./gdb/contrib/words.sh -c $files
31 1.1 christos # ...
32 1.1 christos # it generates a list of ~15000 words prefixed with frequency.
33 1.1 christos #
34 1.1 christos # This could be used to generate a dictionary that is kept as part of the
35 1.1 christos # sources, against which new code can be checked, generating a warning or
36 1.1 christos # error. The hope is that misspellings would trigger this frequently, and rare
37 1.1 christos # words rarely, otherwise the burden of updating the dictionary would be too
38 1.1 christos # much.
39 1.1 christos #
40 1.1 christos # And for:
41 1.1 christos # ...
42 1.1 christos # $ files=$(find gdb -type f -name "*.c" -o -name "*.h")
43 1.1 christos # $ ./gdb/contrib/words.sh -c -f 1 $files
44 1.1 christos # ...
45 1.1 christos # it generates a list of ~5000 words with frequency 1.
46 1.1 christos #
47 1.1 christos # This can be used to scan for misspellings manually.
48 1.1 christos #
49 1.1 christos
50 1.1 christos minfreq=
51 1.1 christos maxfreq=
52 1.1 christos c=false
53 1.1 christos while [ $# -gt 0 ]; do
54 1.1 christos case "$1" in
55 1.1 christos -c)
56 1.1 christos c=true
57 1.1 christos shift
58 1.1 christos ;;
59 1.1 christos --freq|-f)
60 1.1 christos minfreq=$2
61 1.1 christos maxfreq=$2
62 1.1 christos shift 2
63 1.1 christos ;;
64 1.1 christos --min)
65 1.1 christos minfreq=$2
66 1.1 christos if [ "$maxfreq" = "" ]; then
67 1.1 christos maxfreq=0
68 1.1 christos fi
69 1.1 christos shift 2
70 1.1 christos ;;
71 1.1 christos --max)
72 1.1 christos maxfreq=$2
73 1.1 christos if [ "$minfreq" = "" ]; then
74 1.1 christos minfreq=0
75 1.1 christos fi
76 1.1 christos shift 2
77 1.1 christos ;;
78 1.1 christos *)
79 1.1 christos break;
80 1.1 christos ;;
81 1.1 christos esac
82 1.1 christos done
83 1.1 christos
84 1.1 christos if [ "$minfreq" = "" ] && [ "$maxfreq" = "" ]; then
85 1.1 christos minfreq=0
86 1.1 christos maxfreq=0
87 1.1 christos fi
88 1.1 christos
89 1.1 christos awkfile=$(mktemp)
90 1.1 christos trap 'rm -f "$awkfile"' EXIT
91 1.1 christos
92 1.1 christos cat > "$awkfile" <<EOF
93 1.1 christos BEGIN {
94 1.1 christos in_comment=0
95 1.1 christos }
96 1.1 christos
97 1.1 christos // {
98 1.1 christos line=\$0
99 1.1 christos }
100 1.1 christos
101 1.1 christos /\/\*/ {
102 1.1 christos in_comment=1
103 1.1 christos sub(/.*\/\*/, "", line)
104 1.1 christos }
105 1.1 christos
106 1.1 christos /\*\// {
107 1.1 christos sub(/\*\/.*/, "", line)
108 1.1 christos in_comment=0
109 1.1 christos print line
110 1.1 christos next
111 1.1 christos }
112 1.1 christos
113 1.1 christos // {
114 1.1 christos if (in_comment) {
115 1.1 christos print line
116 1.1 christos }
117 1.1 christos }
118 1.1 christos EOF
119 1.1 christos
120 1.1 christos # Stabilize sort.
121 1.1 christos export LC_ALL=C
122 1.1 christos
123 1.1 christos if $c; then
124 1.1 christos awk \
125 1.1 christos -f "$awkfile" \
126 1.1 christos -- "$@"
127 1.1 christos else
128 1.1 christos cat "$@"
129 1.1 christos fi \
130 1.1 christos | sed \
131 1.1 christos -e 's/[!"?;:%^$~#{}`&=@,. \t\/_()|<>\+\*-]/\n/g' \
132 1.1 christos -e 's/\[/\n/g' \
133 1.1 christos -e 's/\]/\n/g' \
134 1.1 christos -e "s/'/\n/g" \
135 1.1 christos -e 's/[0-9][0-9]*/\n/g' \
136 1.1 christos -e 's/[ \t]*//g' \
137 1.1 christos | tr '[:upper:]' '[:lower:]' \
138 1.1 christos | sort \
139 1.1 christos | uniq -c \
140 1.1 christos | awk "{ if (($minfreq == 0 || $minfreq <= \$1) \
141 1.1 christos && ($maxfreq == 0 || \$1 <= $maxfreq)) { print \$0; } }" \
142 1.1 christos | awk '{ print length($0) " " $0; }' \
143 1.1 christos | sort -n -r \
144 1.1 christos | cut -d ' ' -f 2-
145