Home | History | Annotate | Line # | Download | only in contrib
words.sh revision 1.1.1.1.2.1
      1          1.1  christos #!/bin/sh
      2          1.1  christos 
      3  1.1.1.1.2.1  perseant # Copyright (C) 2019-2023 Free Software Foundation, Inc.
      4          1.1  christos # This program is free software; you can redistribute it and/or modify
      5          1.1  christos # it under the terms of the GNU General Public License as published by
      6          1.1  christos # the Free Software Foundation; either version 3 of the License, or
      7          1.1  christos # (at your option) any later version.
      8          1.1  christos #
      9          1.1  christos # This program is distributed in the hope that it will be useful,
     10          1.1  christos # but WITHOUT ANY WARRANTY; without even the implied warranty of
     11          1.1  christos # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12          1.1  christos # GNU General Public License for more details.
     13          1.1  christos #
     14          1.1  christos # You should have received a copy of the GNU General Public License
     15          1.1  christos # along with this program.  If not, see <http://www.gnu.org/licenses/>.
     16          1.1  christos 
     17          1.1  christos # This script intends to facilitate spell checking of source/doc files.
     18          1.1  christos # It:
     19          1.1  christos # - transforms the files into a list of lowercase words
     20          1.1  christos # - prefixes each word with the frequency
     21          1.1  christos # - filters out words within a frequency range
     22          1.1  christos # - sorts the words, longest first
     23          1.1  christos #
     24          1.1  christos # If '-c' is passed as option, it operates on the C comments only, rather than
     25          1.1  christos # on the entire file.
     26          1.1  christos #
     27          1.1  christos # For:
     28          1.1  christos # ...
     29          1.1  christos # $ files=$(find gdb -type f -name "*.c" -o -name "*.h")
     30          1.1  christos # $ ./gdb/contrib/words.sh -c $files
     31          1.1  christos # ...
     32          1.1  christos # it generates a list of ~15000 words prefixed with frequency.
     33          1.1  christos #
     34          1.1  christos # This could be used to generate a dictionary that is kept as part of the
     35          1.1  christos # sources, against which new code can be checked, generating a warning or
     36          1.1  christos # error.  The hope is that misspellings would trigger this frequently, and rare
     37          1.1  christos # words rarely, otherwise the burden of updating the dictionary would be too
     38          1.1  christos # much.
     39          1.1  christos #
     40          1.1  christos # And for:
     41          1.1  christos # ...
     42          1.1  christos # $ files=$(find gdb -type f -name "*.c" -o -name "*.h")
     43          1.1  christos # $ ./gdb/contrib/words.sh -c -f 1 $files
     44          1.1  christos # ...
     45          1.1  christos # it generates a list of ~5000 words with frequency 1.
     46          1.1  christos #
     47          1.1  christos # This can be used to scan for misspellings manually.
     48          1.1  christos #
     49          1.1  christos 
     50          1.1  christos minfreq=
     51          1.1  christos maxfreq=
     52          1.1  christos c=false
     53          1.1  christos while [ $# -gt 0 ]; do
     54          1.1  christos     case "$1" in
     55          1.1  christos 	-c)
     56          1.1  christos 	    c=true
     57          1.1  christos 	    shift
     58          1.1  christos 	    ;;
     59          1.1  christos 	--freq|-f)
     60          1.1  christos 	    minfreq=$2
     61          1.1  christos 	    maxfreq=$2
     62          1.1  christos 	    shift 2
     63          1.1  christos 	    ;;
     64          1.1  christos 	--min)
     65          1.1  christos 	    minfreq=$2
     66          1.1  christos 	    if [ "$maxfreq" = "" ]; then
     67          1.1  christos 		maxfreq=0
     68          1.1  christos 	    fi
     69          1.1  christos 	    shift 2
     70          1.1  christos 	    ;;
     71          1.1  christos 	--max)
     72          1.1  christos 	    maxfreq=$2
     73          1.1  christos 	    if [ "$minfreq" = "" ]; then
     74          1.1  christos 		minfreq=0
     75          1.1  christos 	    fi
     76          1.1  christos 	    shift 2
     77          1.1  christos 	    ;;
     78          1.1  christos 	*)
     79          1.1  christos 	    break;
     80          1.1  christos 	    ;;
     81          1.1  christos     esac
     82          1.1  christos done
     83          1.1  christos 
     84          1.1  christos if [ "$minfreq" = "" ] && [ "$maxfreq" = "" ]; then
     85          1.1  christos     minfreq=0
     86          1.1  christos     maxfreq=0
     87          1.1  christos fi
     88          1.1  christos 
     89          1.1  christos awkfile=$(mktemp)
     90          1.1  christos trap 'rm -f "$awkfile"' EXIT
     91          1.1  christos 
     92          1.1  christos cat > "$awkfile" <<EOF
     93          1.1  christos BEGIN {
     94          1.1  christos     in_comment=0
     95          1.1  christos }
     96          1.1  christos 
     97          1.1  christos // {
     98          1.1  christos     line=\$0
     99          1.1  christos }
    100          1.1  christos 
    101          1.1  christos /\/\*/ {
    102          1.1  christos     in_comment=1
    103          1.1  christos     sub(/.*\/\*/, "", line)
    104          1.1  christos }
    105          1.1  christos 
    106          1.1  christos /\*\// {
    107          1.1  christos     sub(/\*\/.*/, "", line)
    108          1.1  christos     in_comment=0
    109          1.1  christos     print line
    110          1.1  christos     next
    111          1.1  christos }
    112          1.1  christos 
    113          1.1  christos // {
    114          1.1  christos     if (in_comment) {
    115          1.1  christos 	print line
    116          1.1  christos     }
    117          1.1  christos }
    118          1.1  christos EOF
    119          1.1  christos 
    120          1.1  christos # Stabilize sort.
    121          1.1  christos export LC_ALL=C
    122          1.1  christos 
    123          1.1  christos if $c; then
    124          1.1  christos     awk \
    125          1.1  christos 	-f "$awkfile" \
    126          1.1  christos 	-- "$@"
    127          1.1  christos else
    128          1.1  christos     cat "$@"
    129          1.1  christos fi \
    130          1.1  christos     | sed \
    131          1.1  christos 	  -e 's/[!"?;:%^$~#{}`&=@,. \t\/_()|<>\+\*-]/\n/g' \
    132          1.1  christos 	  -e 's/\[/\n/g' \
    133          1.1  christos 	  -e 's/\]/\n/g' \
    134          1.1  christos 	  -e "s/'/\n/g" \
    135          1.1  christos 	  -e 's/[0-9][0-9]*/\n/g' \
    136          1.1  christos 	  -e 's/[ \t]*//g' \
    137          1.1  christos     | tr '[:upper:]' '[:lower:]' \
    138          1.1  christos     | sort \
    139          1.1  christos     | uniq -c \
    140          1.1  christos     | awk "{ if (($minfreq == 0 || $minfreq <= \$1) \
    141          1.1  christos                  && ($maxfreq == 0 || \$1 <= $maxfreq)) { print \$0; } }" \
    142          1.1  christos     | awk '{ print length($0) " " $0; }' \
    143          1.1  christos     | sort -n -r \
    144          1.1  christos     | cut -d ' ' -f 2-
    145