xref: /freebsd/crypto/openssl/util/check-format-commit.sh (revision 0d0c8621fd181e507f0fb50ffcca606faf66a8c2)
1a7148ab3SEnji Cooper#!/bin/bash
2a7148ab3SEnji Cooper# Copyright 2020-2024 The OpenSSL Project Authors. All Rights Reserved.
3a7148ab3SEnji Cooper#
4a7148ab3SEnji Cooper# Licensed under the Apache License 2.0 (the "License").
5a7148ab3SEnji Cooper# You may not use this file except in compliance with the License.
6a7148ab3SEnji Cooper# You can obtain a copy in the file LICENSE in the source distribution
7a7148ab3SEnji Cooper# or at https://www.openssl.org/source/license.html
8a7148ab3SEnji Cooper#
9*0d0c8621SEnji Cooper# This script is a wrapper around check-format.pl.
10*0d0c8621SEnji Cooper# It accepts the same commit revision range as 'git diff' as arguments,
11*0d0c8621SEnji Cooper# or just a single commit id, and uses it to identify the files and line ranges
12*0d0c8621SEnji Cooper# that were changed in that commit range, filtering check-format.pl output
13*0d0c8621SEnji Cooper# only to lines that fall into the change ranges of the changed files.
14*0d0c8621SEnji Cooper# examples:
15*0d0c8621SEnji Cooper# check-format-commit.sh       # check unstaged changes
16*0d0c8621SEnji Cooper# check-format-commit.sh HEAD
17*0d0c8621SEnji Cooper# check-format-commit.sh @~3..
18*0d0c8621SEnji Cooper# check-format-commit.sh f5981c9629667a5a5d6
19*0d0c8621SEnji Cooper# check-format-commit.sh f5981c9629667a5a5d6..ee0bf38e8709bf71888
20a7148ab3SEnji Cooper
21*0d0c8621SEnji Cooper# Allowlist of files to scan
22*0d0c8621SEnji Cooper# Currently this is any .c or .h file (with an optional .in suffix)
23*0d0c8621SEnji CooperFILE_NAME_END_ALLOWLIST=("\.[ch]\(.in\)\?")
24a7148ab3SEnji Cooper
25a7148ab3SEnji Cooper# Global vars
26a7148ab3SEnji Cooper
27a7148ab3SEnji Cooper# TEMPDIR is used to hold any files this script creates
28a7148ab3SEnji Cooper# And is cleaned on EXIT with a trap function
29a7148ab3SEnji CooperTEMPDIR=$(mktemp -d /tmp/checkformat.XXXXXX)
30a7148ab3SEnji Cooper
31a7148ab3SEnji Cooper# TOPDIR always points to the root of the git tree we are working in
32a7148ab3SEnji Cooper# used to locate the check-format.pl script
33a7148ab3SEnji CooperTOPDIR=$(git rev-parse --show-toplevel)
34a7148ab3SEnji Cooper
35a7148ab3SEnji Cooper
36a7148ab3SEnji Cooper# cleanup handler function, returns us to the root of the git tree
37a7148ab3SEnji Cooper# and erases our temp directory
38a7148ab3SEnji Coopercleanup() {
39a7148ab3SEnji Cooper    rm -rf $TEMPDIR
40a7148ab3SEnji Cooper    cd $TOPDIR
41a7148ab3SEnji Cooper}
42a7148ab3SEnji Cooper
43a7148ab3SEnji Coopertrap cleanup EXIT
44a7148ab3SEnji Cooper
45*0d0c8621SEnji Cooper# Get the list of ids of the commits we are checking,
46*0d0c8621SEnji Cooper# or empty for unstaged changes.
47a7148ab3SEnji Cooper# This lets us pass in symbolic ref names like master/etc and
48*0d0c8621SEnji Cooper# resolve them to commit ids easily
49*0d0c8621SEnji CooperCOMMIT_RANGE="$@"
50*0d0c8621SEnji Cooper[ -n $COMMIT_RANGE ] && COMMIT_LAST=$(git rev-parse $COMMIT_RANGE)
51a7148ab3SEnji Cooper
52*0d0c8621SEnji Cooper# Fail gracefully if git rev-parse doesn't produce a valid commit
53a7148ab3SEnji Cooperif [ $? -ne 0 ]
54a7148ab3SEnji Cooperthen
55*0d0c8621SEnji Cooper    echo "$1 is not a valid commit range or commit id"
56a7148ab3SEnji Cooper    exit 1
57a7148ab3SEnji Cooperfi
58a7148ab3SEnji Cooper
59*0d0c8621SEnji Cooper# If the commit range is exactly one revision,
60*0d0c8621SEnji Cooper# git rev-parse will output just the commit id of that one alone.
61*0d0c8621SEnji Cooper# In that case, we must manipulate a little to get a desirable result,
62*0d0c8621SEnji Cooper# as 'git diff' has a slightly different interpretation of a single commit id:
63*0d0c8621SEnji Cooper# it takes that to mean all commits up to HEAD, plus any unstaged changes.
64*0d0c8621SEnji Cooperif [ $(echo -n "$COMMIT_LAST" | wc -w) -ne 1 ]; then
65*0d0c8621SEnji Cooper    COMMIT_LAST=$(echo "$COMMIT_LAST" | head -1)
66*0d0c8621SEnji Cooperelse
67*0d0c8621SEnji Cooper    # $COMMIT_RANGE is just one commit, make it an actual range
68*0d0c8621SEnji Cooper    COMMIT_RANGE=$COMMIT_RANGE^..$COMMIT_RANGE
69a7148ab3SEnji Cooperfi
70*0d0c8621SEnji Cooper
71*0d0c8621SEnji Cooper# Create an iterable list of files to check formatting on,
72*0d0c8621SEnji Cooper# including the line ranges that are changed by the commits
73*0d0c8621SEnji Cooper# It produces output of this format:
74*0d0c8621SEnji Cooper# <file name> <change start line>, <change line count>
75*0d0c8621SEnji Coopergit diff -U0 $COMMIT_RANGE | awk '
76*0d0c8621SEnji Cooper    BEGIN {myfile=""}
77*0d0c8621SEnji Cooper    /^\+\+\+/ { sub(/^b./,"",$2); file=$2 }
78*0d0c8621SEnji Cooper    /^@@/     { sub(/^\+/,"",$3); range=$3; printf file " " range "\n" }
79*0d0c8621SEnji Cooper    ' > $TEMPDIR/ranges.txt
80*0d0c8621SEnji Cooper
81*0d0c8621SEnji Cooper# filter in anything that matches on a filter regex
82*0d0c8621SEnji Cooperfor i in ${FILE_NAME_END_ALLOWLIST[@]}
83*0d0c8621SEnji Cooperdo
84*0d0c8621SEnji Cooper    # Note the space after the $i below.  This is done because we want
85*0d0c8621SEnji Cooper    # to match on file name suffixes, but the input file is of the form
86*0d0c8621SEnji Cooper    # <commit> <file path> <range start>, <range length>
87*0d0c8621SEnji Cooper    # So we can't just match on end of line.  The additional space
88*0d0c8621SEnji Cooper    # here lets us match on suffixes followed by the expected space
89*0d0c8621SEnji Cooper    # in the input file
90*0d0c8621SEnji Cooper    grep "$i " $TEMPDIR/ranges.txt >> $TEMPDIR/ranges.filter || true
91a7148ab3SEnji Cooperdone
92a7148ab3SEnji Cooper
93*0d0c8621SEnji CooperREMAINING_FILES=$(wc -l <$TEMPDIR/ranges.filter)
94*0d0c8621SEnji Cooperif [ $REMAINING_FILES -eq 0 ]
95*0d0c8621SEnji Cooperthen
96*0d0c8621SEnji Cooper    echo "The given commit range has no C source file changes that require checking"
97*0d0c8621SEnji Cooper    exit 0
98*0d0c8621SEnji Cooperfi
99*0d0c8621SEnji Cooper
100*0d0c8621SEnji Cooper# unless checking the format of unstaged changes,
101*0d0c8621SEnji Cooper# check out the files from the commit range.
102*0d0c8621SEnji Cooperif [ -n "$COMMIT_RANGE" ]
103*0d0c8621SEnji Cooperthen
104*0d0c8621SEnji Cooper    # For each file name in ranges, we show that file at the commit range
105*0d0c8621SEnji Cooper    # we are checking, and redirect it to the same path,
106*0d0c8621SEnji Cooper    # relative to $TEMPDIR/check-format.
107*0d0c8621SEnji Cooper    # This give us the full file path to run check-format.pl on
108*0d0c8621SEnji Cooper    # with line numbers matching the ranges in the $TEMPDIR/ranges.filter file
109*0d0c8621SEnji Cooper    for j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u)
110a7148ab3SEnji Cooper    do
111a7148ab3SEnji Cooper        FDIR=$(dirname $j)
112a7148ab3SEnji Cooper        mkdir -p $TEMPDIR/check-format/$FDIR
113*0d0c8621SEnji Cooper        git show $COMMIT_LAST:$j > $TEMPDIR/check-format/$j
114a7148ab3SEnji Cooper    done
115*0d0c8621SEnji Cooperfi
116a7148ab3SEnji Cooper
117*0d0c8621SEnji Cooper# Now for each file in $TEMPDIR/ranges.filter, run check-format.pl
118*0d0c8621SEnji Cooperfor j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u)
119a7148ab3SEnji Cooperdo
120a7148ab3SEnji Cooper    range_start=()
121a7148ab3SEnji Cooper    range_end=()
122a7148ab3SEnji Cooper
123a7148ab3SEnji Cooper    # Get the ranges for this file. Create 2 arrays.  range_start contains
124a7148ab3SEnji Cooper    # the start lines for valid ranges from the commit.  the range_end array
125*0d0c8621SEnji Cooper    # contains the corresponding end line.  Note, since diff output gives us
126a7148ab3SEnji Cooper    # a line count for a change, the range_end[k] entry is actually
127a7148ab3SEnji Cooper    # range_start[k]+line count
128*0d0c8621SEnji Cooper    for k in $(grep ^$j $TEMPDIR/ranges.filter | awk '{print $2}')
129a7148ab3SEnji Cooper    do
130*0d0c8621SEnji Cooper        RSTART=$(echo $k | awk -F',' '{print $1}')
131*0d0c8621SEnji Cooper        RLEN=$(echo $k | awk -F',' '{print $2}')
132*0d0c8621SEnji Cooper        # when the hunk is just one line, its length is implied
133*0d0c8621SEnji Cooper        if [ -z "$RLEN" ]; then RLEN=1; fi
134a7148ab3SEnji Cooper        let REND=$RSTART+$RLEN
135a7148ab3SEnji Cooper        range_start+=($RSTART)
136a7148ab3SEnji Cooper        range_end+=($REND)
137a7148ab3SEnji Cooper    done
138a7148ab3SEnji Cooper
139*0d0c8621SEnji Cooper    # Go to our checked out tree, unless checking unstaged changes
140*0d0c8621SEnji Cooper    [ -n "$COMMIT_RANGE" ] && cd $TEMPDIR/check-format
141a7148ab3SEnji Cooper
142a7148ab3SEnji Cooper    # Actually run check-format.pl on the file, capturing the output
143*0d0c8621SEnji Cooper    # in a temporary file.  Note the format of check-format.pl output is
144*0d0c8621SEnji Cooper    # <file path>:<line number>:<error text>:<offending line contents>
145*0d0c8621SEnji Cooper    $TOPDIR/util/check-format.pl $j > $TEMPDIR/results.txt
146a7148ab3SEnji Cooper
147a7148ab3SEnji Cooper    # Now we filter the check-format.pl output based on the changed lines
148a7148ab3SEnji Cooper    # captured in the range_start/end arrays
149a7148ab3SEnji Cooper    let maxidx=${#range_start[@]}-1
150a7148ab3SEnji Cooper    for k in $(seq 0 1 $maxidx)
151a7148ab3SEnji Cooper    do
152a7148ab3SEnji Cooper        RSTART=${range_start[$k]}
153a7148ab3SEnji Cooper        REND=${range_end[$k]}
154a7148ab3SEnji Cooper
155a7148ab3SEnji Cooper        # field 2 of check-format.pl output is the offending line number
156a7148ab3SEnji Cooper        # Check here if any line in that output falls between any of the
157a7148ab3SEnji Cooper        # start/end ranges defined in the range_start/range_end array.
158a7148ab3SEnji Cooper        # If it does fall in that range, print the entire line to stdout
159a7148ab3SEnji Cooper        awk -v rstart=$RSTART -v rend=$REND -F':' '
160*0d0c8621SEnji Cooper                /:/ { if (rstart <= $2 && $2 <= rend) print $0 }
161*0d0c8621SEnji Cooper            ' $TEMPDIR/results.txt >>$TEMPDIR/results-filtered.txt
162*0d0c8621SEnji Cooper    done
163*0d0c8621SEnji Cooperdone
164*0d0c8621SEnji Coopercat $TEMPDIR/results-filtered.txt
165a7148ab3SEnji Cooper
166*0d0c8621SEnji Cooper# If any findings were in range, exit with a different error code
167*0d0c8621SEnji Cooperif [ -s $TEMPDIR/results-filtered.txt ]
168a7148ab3SEnji Cooperthen
169*0d0c8621SEnji Cooper    exit 2
170a7148ab3SEnji Cooperfi
171