1a7148ab3SEnji Cooper#!/bin/bash 2a7148ab3SEnji Cooper# Copyright 2020-2024 The OpenSSL Project Authors. All Rights Reserved. 3a7148ab3SEnji Cooper# 4a7148ab3SEnji Cooper# Licensed under the Apache License 2.0 (the "License"). 5a7148ab3SEnji Cooper# You may not use this file except in compliance with the License. 6a7148ab3SEnji Cooper# You can obtain a copy in the file LICENSE in the source distribution 7a7148ab3SEnji Cooper# or at https://www.openssl.org/source/license.html 8a7148ab3SEnji Cooper# 9*0d0c8621SEnji Cooper# This script is a wrapper around check-format.pl. 10*0d0c8621SEnji Cooper# It accepts the same commit revision range as 'git diff' as arguments, 11*0d0c8621SEnji Cooper# or just a single commit id, and uses it to identify the files and line ranges 12*0d0c8621SEnji Cooper# that were changed in that commit range, filtering check-format.pl output 13*0d0c8621SEnji Cooper# only to lines that fall into the change ranges of the changed files. 14*0d0c8621SEnji Cooper# examples: 15*0d0c8621SEnji Cooper# check-format-commit.sh # check unstaged changes 16*0d0c8621SEnji Cooper# check-format-commit.sh HEAD 17*0d0c8621SEnji Cooper# check-format-commit.sh @~3.. 18*0d0c8621SEnji Cooper# check-format-commit.sh f5981c9629667a5a5d6 19*0d0c8621SEnji Cooper# check-format-commit.sh f5981c9629667a5a5d6..ee0bf38e8709bf71888 20a7148ab3SEnji Cooper 21*0d0c8621SEnji Cooper# Allowlist of files to scan 22*0d0c8621SEnji Cooper# Currently this is any .c or .h file (with an optional .in suffix) 23*0d0c8621SEnji CooperFILE_NAME_END_ALLOWLIST=("\.[ch]\(.in\)\?") 24a7148ab3SEnji Cooper 25a7148ab3SEnji Cooper# Global vars 26a7148ab3SEnji Cooper 27a7148ab3SEnji Cooper# TEMPDIR is used to hold any files this script creates 28a7148ab3SEnji Cooper# And is cleaned on EXIT with a trap function 29a7148ab3SEnji CooperTEMPDIR=$(mktemp -d /tmp/checkformat.XXXXXX) 30a7148ab3SEnji Cooper 31a7148ab3SEnji Cooper# TOPDIR always points to the root of the git tree we are working in 32a7148ab3SEnji Cooper# used to locate the check-format.pl script 33a7148ab3SEnji CooperTOPDIR=$(git rev-parse --show-toplevel) 34a7148ab3SEnji Cooper 35a7148ab3SEnji Cooper 36a7148ab3SEnji Cooper# cleanup handler function, returns us to the root of the git tree 37a7148ab3SEnji Cooper# and erases our temp directory 38a7148ab3SEnji Coopercleanup() { 39a7148ab3SEnji Cooper rm -rf $TEMPDIR 40a7148ab3SEnji Cooper cd $TOPDIR 41a7148ab3SEnji Cooper} 42a7148ab3SEnji Cooper 43a7148ab3SEnji Coopertrap cleanup EXIT 44a7148ab3SEnji Cooper 45*0d0c8621SEnji Cooper# Get the list of ids of the commits we are checking, 46*0d0c8621SEnji Cooper# or empty for unstaged changes. 47a7148ab3SEnji Cooper# This lets us pass in symbolic ref names like master/etc and 48*0d0c8621SEnji Cooper# resolve them to commit ids easily 49*0d0c8621SEnji CooperCOMMIT_RANGE="$@" 50*0d0c8621SEnji Cooper[ -n $COMMIT_RANGE ] && COMMIT_LAST=$(git rev-parse $COMMIT_RANGE) 51a7148ab3SEnji Cooper 52*0d0c8621SEnji Cooper# Fail gracefully if git rev-parse doesn't produce a valid commit 53a7148ab3SEnji Cooperif [ $? -ne 0 ] 54a7148ab3SEnji Cooperthen 55*0d0c8621SEnji Cooper echo "$1 is not a valid commit range or commit id" 56a7148ab3SEnji Cooper exit 1 57a7148ab3SEnji Cooperfi 58a7148ab3SEnji Cooper 59*0d0c8621SEnji Cooper# If the commit range is exactly one revision, 60*0d0c8621SEnji Cooper# git rev-parse will output just the commit id of that one alone. 61*0d0c8621SEnji Cooper# In that case, we must manipulate a little to get a desirable result, 62*0d0c8621SEnji Cooper# as 'git diff' has a slightly different interpretation of a single commit id: 63*0d0c8621SEnji Cooper# it takes that to mean all commits up to HEAD, plus any unstaged changes. 64*0d0c8621SEnji Cooperif [ $(echo -n "$COMMIT_LAST" | wc -w) -ne 1 ]; then 65*0d0c8621SEnji Cooper COMMIT_LAST=$(echo "$COMMIT_LAST" | head -1) 66*0d0c8621SEnji Cooperelse 67*0d0c8621SEnji Cooper # $COMMIT_RANGE is just one commit, make it an actual range 68*0d0c8621SEnji Cooper COMMIT_RANGE=$COMMIT_RANGE^..$COMMIT_RANGE 69a7148ab3SEnji Cooperfi 70*0d0c8621SEnji Cooper 71*0d0c8621SEnji Cooper# Create an iterable list of files to check formatting on, 72*0d0c8621SEnji Cooper# including the line ranges that are changed by the commits 73*0d0c8621SEnji Cooper# It produces output of this format: 74*0d0c8621SEnji Cooper# <file name> <change start line>, <change line count> 75*0d0c8621SEnji Coopergit diff -U0 $COMMIT_RANGE | awk ' 76*0d0c8621SEnji Cooper BEGIN {myfile=""} 77*0d0c8621SEnji Cooper /^\+\+\+/ { sub(/^b./,"",$2); file=$2 } 78*0d0c8621SEnji Cooper /^@@/ { sub(/^\+/,"",$3); range=$3; printf file " " range "\n" } 79*0d0c8621SEnji Cooper ' > $TEMPDIR/ranges.txt 80*0d0c8621SEnji Cooper 81*0d0c8621SEnji Cooper# filter in anything that matches on a filter regex 82*0d0c8621SEnji Cooperfor i in ${FILE_NAME_END_ALLOWLIST[@]} 83*0d0c8621SEnji Cooperdo 84*0d0c8621SEnji Cooper # Note the space after the $i below. This is done because we want 85*0d0c8621SEnji Cooper # to match on file name suffixes, but the input file is of the form 86*0d0c8621SEnji Cooper # <commit> <file path> <range start>, <range length> 87*0d0c8621SEnji Cooper # So we can't just match on end of line. The additional space 88*0d0c8621SEnji Cooper # here lets us match on suffixes followed by the expected space 89*0d0c8621SEnji Cooper # in the input file 90*0d0c8621SEnji Cooper grep "$i " $TEMPDIR/ranges.txt >> $TEMPDIR/ranges.filter || true 91a7148ab3SEnji Cooperdone 92a7148ab3SEnji Cooper 93*0d0c8621SEnji CooperREMAINING_FILES=$(wc -l <$TEMPDIR/ranges.filter) 94*0d0c8621SEnji Cooperif [ $REMAINING_FILES -eq 0 ] 95*0d0c8621SEnji Cooperthen 96*0d0c8621SEnji Cooper echo "The given commit range has no C source file changes that require checking" 97*0d0c8621SEnji Cooper exit 0 98*0d0c8621SEnji Cooperfi 99*0d0c8621SEnji Cooper 100*0d0c8621SEnji Cooper# unless checking the format of unstaged changes, 101*0d0c8621SEnji Cooper# check out the files from the commit range. 102*0d0c8621SEnji Cooperif [ -n "$COMMIT_RANGE" ] 103*0d0c8621SEnji Cooperthen 104*0d0c8621SEnji Cooper # For each file name in ranges, we show that file at the commit range 105*0d0c8621SEnji Cooper # we are checking, and redirect it to the same path, 106*0d0c8621SEnji Cooper # relative to $TEMPDIR/check-format. 107*0d0c8621SEnji Cooper # This give us the full file path to run check-format.pl on 108*0d0c8621SEnji Cooper # with line numbers matching the ranges in the $TEMPDIR/ranges.filter file 109*0d0c8621SEnji Cooper for j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u) 110a7148ab3SEnji Cooper do 111a7148ab3SEnji Cooper FDIR=$(dirname $j) 112a7148ab3SEnji Cooper mkdir -p $TEMPDIR/check-format/$FDIR 113*0d0c8621SEnji Cooper git show $COMMIT_LAST:$j > $TEMPDIR/check-format/$j 114a7148ab3SEnji Cooper done 115*0d0c8621SEnji Cooperfi 116a7148ab3SEnji Cooper 117*0d0c8621SEnji Cooper# Now for each file in $TEMPDIR/ranges.filter, run check-format.pl 118*0d0c8621SEnji Cooperfor j in $(awk '{print $1}' $TEMPDIR/ranges.filter | sort -u) 119a7148ab3SEnji Cooperdo 120a7148ab3SEnji Cooper range_start=() 121a7148ab3SEnji Cooper range_end=() 122a7148ab3SEnji Cooper 123a7148ab3SEnji Cooper # Get the ranges for this file. Create 2 arrays. range_start contains 124a7148ab3SEnji Cooper # the start lines for valid ranges from the commit. the range_end array 125*0d0c8621SEnji Cooper # contains the corresponding end line. Note, since diff output gives us 126a7148ab3SEnji Cooper # a line count for a change, the range_end[k] entry is actually 127a7148ab3SEnji Cooper # range_start[k]+line count 128*0d0c8621SEnji Cooper for k in $(grep ^$j $TEMPDIR/ranges.filter | awk '{print $2}') 129a7148ab3SEnji Cooper do 130*0d0c8621SEnji Cooper RSTART=$(echo $k | awk -F',' '{print $1}') 131*0d0c8621SEnji Cooper RLEN=$(echo $k | awk -F',' '{print $2}') 132*0d0c8621SEnji Cooper # when the hunk is just one line, its length is implied 133*0d0c8621SEnji Cooper if [ -z "$RLEN" ]; then RLEN=1; fi 134a7148ab3SEnji Cooper let REND=$RSTART+$RLEN 135a7148ab3SEnji Cooper range_start+=($RSTART) 136a7148ab3SEnji Cooper range_end+=($REND) 137a7148ab3SEnji Cooper done 138a7148ab3SEnji Cooper 139*0d0c8621SEnji Cooper # Go to our checked out tree, unless checking unstaged changes 140*0d0c8621SEnji Cooper [ -n "$COMMIT_RANGE" ] && cd $TEMPDIR/check-format 141a7148ab3SEnji Cooper 142a7148ab3SEnji Cooper # Actually run check-format.pl on the file, capturing the output 143*0d0c8621SEnji Cooper # in a temporary file. Note the format of check-format.pl output is 144*0d0c8621SEnji Cooper # <file path>:<line number>:<error text>:<offending line contents> 145*0d0c8621SEnji Cooper $TOPDIR/util/check-format.pl $j > $TEMPDIR/results.txt 146a7148ab3SEnji Cooper 147a7148ab3SEnji Cooper # Now we filter the check-format.pl output based on the changed lines 148a7148ab3SEnji Cooper # captured in the range_start/end arrays 149a7148ab3SEnji Cooper let maxidx=${#range_start[@]}-1 150a7148ab3SEnji Cooper for k in $(seq 0 1 $maxidx) 151a7148ab3SEnji Cooper do 152a7148ab3SEnji Cooper RSTART=${range_start[$k]} 153a7148ab3SEnji Cooper REND=${range_end[$k]} 154a7148ab3SEnji Cooper 155a7148ab3SEnji Cooper # field 2 of check-format.pl output is the offending line number 156a7148ab3SEnji Cooper # Check here if any line in that output falls between any of the 157a7148ab3SEnji Cooper # start/end ranges defined in the range_start/range_end array. 158a7148ab3SEnji Cooper # If it does fall in that range, print the entire line to stdout 159a7148ab3SEnji Cooper awk -v rstart=$RSTART -v rend=$REND -F':' ' 160*0d0c8621SEnji Cooper /:/ { if (rstart <= $2 && $2 <= rend) print $0 } 161*0d0c8621SEnji Cooper ' $TEMPDIR/results.txt >>$TEMPDIR/results-filtered.txt 162*0d0c8621SEnji Cooper done 163*0d0c8621SEnji Cooperdone 164*0d0c8621SEnji Coopercat $TEMPDIR/results-filtered.txt 165a7148ab3SEnji Cooper 166*0d0c8621SEnji Cooper# If any findings were in range, exit with a different error code 167*0d0c8621SEnji Cooperif [ -s $TEMPDIR/results-filtered.txt ] 168a7148ab3SEnji Cooperthen 169*0d0c8621SEnji Cooper exit 2 170a7148ab3SEnji Cooperfi 171