Home | History | Annotate | Line # | Download | only in util
      1 #!/bin/bash
      2 # Copyright 2020-2024 The OpenSSL Project Authors. All Rights Reserved.
      3 #
      4 # Licensed under the Apache License 2.0 (the "License").
      5 # You may not use this file except in compliance with the License.
      6 # You can obtain a copy in the file LICENSE in the source distribution
      7 # or at https://www.openssl.org/source/license.html
      8 #
      9 # This script is a wrapper around check-format.pl.
     10 # It accepts the same commit revision range as 'git diff' as arguments,
     11 # or just a single commit id, and uses it to identify the files and line ranges
     12 # that were changed in that commit range, filtering check-format.pl output
     13 # only to lines that fall into the change ranges of the changed files.
     14 # examples:
     15 # check-format-commit.sh       # check unstaged changes
     16 # check-format-commit.sh HEAD
     17 # check-format-commit.sh @~3..
     18 # check-format-commit.sh f5981c9629667a5a5d6
     19 # check-format-commit.sh f5981c9629667a5a5d6..ee0bf38e8709bf71888
     20 
     21 # Allowlist of files to scan
     22 # Currently this is any .c or .h file (with an optional .in suffix)
     23 FILE_NAME_END_ALLOWLIST=("\.[ch]\(.in\)\?")
     24 
     25 # Global vars
     26 
     27 # TEMPDIR is used to hold any files this script creates
     28 # And is cleaned on EXIT with a trap function
     29 TEMPDIR=$(mktemp -d /tmp/checkformat.XXXXXX)
     30 
     31 # TOPDIR always points to the root of the git tree we are working in
     32 # used to locate the check-format.pl script
     33 TOPDIR=$(git rev-parse --show-toplevel)
     34 
     35 
     36 # cleanup handler function, returns us to the root of the git tree
     37 # and erases our temp directory
     38 cleanup() {
     39     rm -rf $TEMPDIR
     40     cd $TOPDIR
     41 }
     42 
     43 trap cleanup EXIT
     44 
     45 # Get the list of ids of the commits we are checking,
     46 # or empty for unstaged changes.
     47 # This lets us pass in symbolic ref names like master/etc and 
     48 # resolve them to commit ids easily
     49 COMMIT_RANGE="$@"
     50 [ -n $COMMIT_RANGE ] && COMMIT_LAST=$(git rev-parse $COMMIT_RANGE)
     51 
     52 # Fail gracefully if git rev-parse doesn't produce a valid commit
     53 if [ $? -ne 0 ]
     54 then
     55     echo "$1 is not a valid commit range or commit id"
     56     exit 1
     57 fi
     58 
     59 # If the commit range is exactly one revision,
     60 # git rev-parse will output just the commit id of that one alone.
     61 # In that case, we must manipulate a little to get a desirable result,
     62 # as 'git diff' has a slightly different interpretation of a single commit id:
     63 # it takes that to mean all commits up to HEAD, plus any unstaged changes.
     64 if [ $(echo -n "$COMMIT_LAST" | wc -w) -ne 1 ]; then
     65     COMMIT_LAST=$(echo "$COMMIT_LAST" | head -1)
     66 else
     67     # $COMMIT_RANGE is just one commit, make it an actual range
     68     COMMIT_RANGE=$COMMIT_RANGE^..$COMMIT_RANGE
     69 fi
     70 
     71 # Create an iterable list of files to check formatting on,
     72 # including the line ranges that are changed by the commits
     73 # It produces output of this format:
     74 # <file name> <change start line>, <change line count>
     75 git diff -U0 $COMMIT_RANGE | awk '
     76     BEGIN {myfile=""} 
     77     /^\+\+\+/ { sub(/^b./,"",$2); file=$2 }
     78     /^@@/     { sub(/^\+/,"",$3); range=$3; printf file " " range "\n" }
     79     ' > $TEMPDIR/ranges.txt
     80 
     81 # filter in anything that matches on a filter regex
     82 for i in ${FILE_NAME_END_ALLOWLIST[@]}
     83 do
     84     # Note the space after the $i below.  This is done because we want
     85     # to match on file name suffixes, but the input file is of the form
     86     # <commit> <file path> <range start>, <range length>
     87     # So we can't just match on end of line.  The additional space
     88     # here lets us match on suffixes followed by the expected space
     89     # in the input file
     90     grep "$i " $TEMPDIR/ranges.txt >> $TEMPDIR/ranges.filter || true
     91 done
     92 
     93 REMAINING_FILES=$(wc -l <$TEMPDIR/ranges.filter)
     94 if [ $REMAINING_FILES -eq 0 ]
     95 then
     96     echo "The given commit range has no C source file changes that require checking"
     97     exit 0
     98 fi
     99 
    100 # unless checking the format of unstaged changes,
    101 # check out the files from the commit range.
    102 if [ -n "$COMMIT_RANGE" ]
    103 then
    104     # For each file name in ranges, we show that file at the commit range
    105     # we are checking, and redirect it to the same path,
    106     # relative to $TEMPDIR/check-format.
    107     # This give us the full file path to run check-format.pl on
    108     # with line numbers matching the ranges in the $TEMPDIR/ranges.filter file
    109     for j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u)
    110     do
    111         FDIR=$(dirname $j)
    112         mkdir -p $TEMPDIR/check-format/$FDIR
    113         git show $COMMIT_LAST:$j > $TEMPDIR/check-format/$j
    114     done
    115 fi
    116 
    117 # Now for each file in $TEMPDIR/ranges.filter, run check-format.pl
    118 for j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u)
    119 do
    120     range_start=()
    121     range_end=()
    122 
    123     # Get the ranges for this file. Create 2 arrays.  range_start contains
    124     # the start lines for valid ranges from the commit.  the range_end array
    125     # contains the corresponding end line.  Note, since diff output gives us
    126     # a line count for a change, the range_end[k] entry is actually
    127     # range_start[k]+line count
    128     for k in $(grep ^$j $TEMPDIR/ranges.filter | awk '{print $2}')
    129     do
    130         RSTART=$(echo $k | awk -F',' '{print $1}')
    131         RLEN=$(echo $k | awk -F',' '{print $2}')
    132         # when the hunk is just one line, its length is implied
    133         if [ -z "$RLEN" ]; then RLEN=1; fi
    134         let REND=$RSTART+$RLEN
    135         range_start+=($RSTART)
    136         range_end+=($REND)
    137     done
    138 
    139     # Go to our checked out tree, unless checking unstaged changes
    140     [ -n "$COMMIT_RANGE" ] && cd $TEMPDIR/check-format
    141 
    142     # Actually run check-format.pl on the file, capturing the output
    143     # in a temporary file.  Note the format of check-format.pl output is
    144     # <file path>:<line number>:<error text>:<offending line contents>
    145     $TOPDIR/util/check-format.pl $j > $TEMPDIR/results.txt
    146 
    147     # Now we filter the check-format.pl output based on the changed lines
    148     # captured in the range_start/end arrays
    149     let maxidx=${#range_start[@]}-1
    150     for k in $(seq 0 1 $maxidx)
    151     do
    152         RSTART=${range_start[$k]}
    153         REND=${range_end[$k]}
    154 
    155         # field 2 of check-format.pl output is the offending line number
    156         # Check here if any line in that output falls between any of the 
    157         # start/end ranges defined in the range_start/range_end array.
    158         # If it does fall in that range, print the entire line to stdout
    159         awk -v rstart=$RSTART -v rend=$REND -F':' '
    160                 /:/ { if (rstart <= $2 && $2 <= rend) print $0 }
    161             ' $TEMPDIR/results.txt >>$TEMPDIR/results-filtered.txt
    162     done
    163 done
    164 cat $TEMPDIR/results-filtered.txt
    165 
    166 # If any findings were in range, exit with a different error code
    167 if [ -s $TEMPDIR/results-filtered.txt ]
    168 then
    169     exit 2
    170 fi
    171