xref: /freebsd/contrib/bmake/mk/meta2deps.sh (revision 3dd5524264095ed8612c28908e13f80668eff2f9)
1#!/bin/sh
2
3# NAME:
4#	meta2deps.sh - extract useful info from .meta files
5#
6# SYNOPSIS:
7#	meta2deps.sh SB="SB" "meta" ...
8#
9# DESCRIPTION:
10#	This script looks each "meta" file and extracts the
11#	information needed to deduce build and src dependencies.
12#
13#	To do this, we extract the 'CWD' record as well as all the
14#	syscall traces which describe 'R'ead, 'C'hdir and 'E'xec
15#	syscalls.
16#
17#	The typical meta file looks like::
18#.nf
19#
20#	# Meta data file "path"
21#	CMD "command-line"
22#	CWD "cwd"
23#	TARGET "target"
24#	-- command output --
25#	-- filemon acquired metadata --
26#	# buildmon version 2
27#	V 2
28#	E "pid" "path"
29#	R "pid" "path"
30#	C "pid" "cwd"
31#	R "pid" "path"
32#	X "pid" "status"
33#.fi
34#
35#	The fact that all the syscall entry lines start with a single
36#	character make these files quite easy to process using sed(1).
37#
38#	To simplify the logic the 'CWD' line is made to look like a
39#	normal 'C'hdir entry, and "cwd" is remembered so that it can
40#	be prefixed to any "path" which is not absolute.
41#
42#	If the "path" being read ends in '.srcrel' it is the content
43#	of (actually the first line of) that file that we are
44#	interested in.
45#
46#	Any "path" which lies outside of the sandbox "SB" is generally
47#	not of interest and is ignored.
48#
49#	The output, is a set of absolute paths with "SB" like:
50#.nf
51#
52#	$SB/obj-i386/bsd/include
53#	$SB/obj-i386/bsd/lib/csu/i386
54#	$SB/obj-i386/bsd/lib/libc
55#	$SB/src/bsd/include
56#	$SB/src/bsd/sys/i386/include
57#	$SB/src/bsd/sys/sys
58#	$SB/src/pan-release/rtsock
59#	$SB/src/pfe-shared/include/jnx
60#.fi
61#
62#	Which can then be further processed by 'gendirdeps.mk'
63#
64#	If we are passed 'DPDEPS='"dpdeps", then for each src file
65#	outside of "CURDIR" we read, we output a line like:
66#.nf
67#
68#	DPDEPS_$path += $RELDIR
69#.fi
70#
71#	with "$path" geting turned into reldir's, so that we can end
72#	up with a list of all the directories which depend on each src
73#	file in another directory.  This can allow for efficient yet
74#	complete testing of changes.
75
76
77# RCSid:
78#	$Id: meta2deps.sh,v 1.20 2023/01/18 01:35:24 sjg Exp $
79
80# Copyright (c) 2010-2013, Juniper Networks, Inc.
81# All rights reserved.
82#
83# Redistribution and use in source and binary forms, with or without
84# modification, are permitted provided that the following conditions
85# are met:
86# 1. Redistributions of source code must retain the above copyright
87#    notice, this list of conditions and the following disclaimer.
88# 2. Redistributions in binary form must reproduce the above copyright
89#    notice, this list of conditions and the following disclaimer in the
90#    documentation and/or other materials provided with the distribution.
91#
92# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
93# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
94# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
95# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
96# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
97# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
98# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
99# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
100# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
101# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
102# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
103
104meta2src() {
105    cat /dev/null "$@" |
106    sed -n '/^R .*\.[chyl]$/s,^..[0-9]* ,,p' |
107    sort -u
108}
109
110meta2dirs() {
111    cat /dev/null "$@" |
112    sed -n '/^R .*\/.*\.[a-z0-9][^\/]*$/s,^..[0-9]* \(.*\)/[^/]*$,\1,p' |
113    sort -u
114}
115
116add_list() {
117    sep=' '
118    suffix=
119    while :
120    do
121	case "$1" in
122	"|") sep="$1"; shift;;
123	-s) suffix="$2"; shift 2;;
124	*) break;;
125	esac
126    done
127    name=$1
128    shift
129    eval list="\$$name"
130    for top in "$@"
131    do
132	case "$sep$list$sep" in
133	*"$sep$top$suffix$sep"*) continue;;
134	esac
135	list="${list:+$list$sep}$top$suffix"
136    done
137    eval "$name=\"$list\""
138}
139
140# some Linux systems have deprecated egrep in favor of grep -E
141# but not everyone supports that
142case "`echo bmake | egrep 'a|b' 2>&1`" in
143bmake) ;;
144*) egrep() { grep -E "$@"; }
145esac
146
147_excludes_f() {
148    egrep -v "$EXCLUDES"
149}
150
151error() {
152    echo "ERROR: $@" >&2
153    exit 1
154}
155
156meta2deps() {
157    DPDEPS=
158    SRCTOPS=$SRCTOP
159    OBJROOTS=
160    EXCLUDES=
161    while :
162    do
163	case "$1" in
164	*=*) eval export "$1"; shift;;
165	-a) MACHINE_ARCH=$2; shift 2;;
166	-m) MACHINE=$2; shift 2;;
167	-C) CURDIR=$2; shift 2;;
168	-H) HOST_TARGET=$2; shift 2;;
169	-S) add_list SRCTOPS $2; shift 2;;
170	-O) add_list OBJROOTS $2; shift 2;;
171	-X) add_list EXCLUDES '|' $2; shift 2;;
172	-R) RELDIR=$2; shift 2;;
173	-T) TARGET_SPEC=$2; shift 2;;
174	*) break;;
175	esac
176    done
177
178    _th= _o=
179    case "$MACHINE" in
180    host) _ht=$HOST_TARGET;;
181    esac
182
183    for o in $OBJROOTS
184    do
185	case "$MACHINE,/$o/" in
186	host,*$HOST_TARGET*) ;;
187	*$MACHINE*|*${TARGET_SPEC:-$MACHINE}*) ;;
188	*) add_list _o $o; continue;;
189	esac
190	for x in $_ht $TARGET_SPEC $MACHINE
191	do
192	    case "$o" in
193	    "") continue;;
194	    */$x/) add_list _o ${o%$x/}; o=;;
195	    */$x) add_list _o ${o%$x}; o=;;
196	    *$x/) add_list _o ${o%$x/}; o=;;
197	    *$x) add_list _o ${o%$x}; o=;;
198	    esac
199	done
200    done
201    OBJROOTS="$_o"
202
203    case "$OBJTOP" in
204    "")
205	for o in $OBJROOTS
206	do
207	    OBJTOP=$o${TARGET_SPEC:-$MACHINE}
208	    break
209	done
210	;;
211    esac
212    src_re=
213    obj_re=
214    add_list '|' -s '/*' src_re $SRCTOPS
215    add_list '|' -s '*' obj_re $OBJROOTS
216
217    [ -z "$RELDIR" ] && unset DPDEPS
218    tf=/tmp/m2d$$-$USER
219    rm -f $tf.*
220    trap 'rm -f $tf.*; trap 0' 0
221
222    > $tf.dirdep
223    > $tf.qual
224    > $tf.srcdep
225    > $tf.srcrel
226    > $tf.dpdeps
227
228    seenit=
229    seensrc=
230    lpid=
231    case "$EXCLUDES" in
232    "") _excludes=cat;;
233    *) _excludes=_excludes_f;;
234    esac
235    # handle @list files
236    case "$@" in
237    *@[!.]*)
238	for f in "$@"
239	do
240	    case "$f" in
241	    *.meta) cat $f;;
242	    @*) xargs cat < ${f#@};;
243	    *) cat $f;;
244	    esac
245	done
246	;;
247    *) cat /dev/null "$@";;
248    esac 2> /dev/null |
249    sed -e 's,^CWD,C C,;/^[#CREFLMVX] /!d' -e "s,',,g" |
250    $_excludes | ( version=no epids= xpids= eof_token=no
251    while read op pid path junk
252    do
253	: op=$op pid=$pid path=$path
254	# we track cwd and ldir (of interest) per pid
255	# CWD is bmake's cwd
256	case "$lpid,$pid" in
257	,C) CWD=$path cwd=$path ldir=$path
258	    if [ -z "$SB" ]; then
259		SB=`echo $CWD | sed 's,/obj.*,,'`
260	    fi
261	    SRCTOP=${SRCTOP:-$SB/src}
262	    case "$verion" in
263	    no) ;;		# ignore
264	    0) error "no filemon data";;
265	    *) ;;
266	    esac
267	    version=0
268	    case "$eof_token" in
269	    no) ;;		# ignore
270	    0) error "truncated filemon data";;
271	    esac
272	    eof_token=0
273	    continue
274	    ;;
275	$pid,$pid) ;;
276	[1-9]*)
277	    case "$lpid" in
278	    "") ;;
279	    *) eval ldir_$lpid=$ldir;;
280	    esac
281	    eval ldir=\${ldir_$pid:-$CWD} cwd=\${cwd_$pid:-$CWD}
282	    lpid=$pid
283	    ;;
284	esac
285
286	: op=$op path=$path
287	case "$op,$path" in
288	V,*) version=$pid; continue;;
289	W,*srcrel|*.dirdep) continue;;
290	C,*)
291	    case "$path" in
292	    /*) cwd=$path;;
293	    *) cwd=`cd $cwd/$path 2> /dev/null && /bin/pwd`;;
294	    esac
295	    # watch out for temp dirs that no longer exist
296	    test -d ${cwd:-/dev/null/no/such} || cwd=$CWD
297	    eval cwd_$pid=$cwd
298	    continue
299	    ;;
300	F,*) # $path is new pid
301	    eval cwd_$path=$cwd ldir_$path=$ldir
302	    continue
303	    ;;
304	\#,bye) eof_token=1; continue;;
305	\#*) continue;;
306	*)  dir=${path%/*}
307	    case "$op" in
308	    E)	# setid apps get no tracing so we won't see eXit
309		case `'ls' -l $path 2> /dev/null | sed 's, .*,,'` in
310		*s*) ;;
311		*) epids="$epids $pid";;
312		esac
313		;;
314	    X) xpids="$xpids $pid"; continue;;
315	    esac
316	    case "$path" in
317	    $src_re|$obj_re) ;;
318	    /*/stage/*) ;;
319	    /*) continue;;
320	    *)	for path in $ldir/$path $cwd/$path
321		do
322			test -e $path && break
323		done
324		dir=${path%/*}
325		;;
326	    esac
327	    ;;
328	esac
329	# avoid repeating ourselves...
330	case "$DPDEPS,$seensrc," in
331	,*)
332	    case ",$seenit," in
333	    *,$dir,*) continue;;
334	    esac
335	    ;;
336	*,$path,*) continue;;
337	esac
338	# canonicalize if needed
339	case "/$dir/" in
340	*/../*|*/./*)
341	    rdir=$dir
342	    dir=`cd $dir 2> /dev/null && /bin/pwd`
343	    seen="$rdir,$dir"
344	    ;;
345	*)  seen=$dir;;
346	esac
347	case "$dir" in
348	${CURDIR:-.}|"") continue;;
349	$src_re)
350	    # avoid repeating ourselves...
351	    case "$DPDEPS,$seensrc," in
352	    ,*)
353		case ",$seenit," in
354		*,$dir,*) continue;;
355		esac
356		;;
357	    esac
358	    ;;
359	*)
360	    case ",$seenit," in
361	    *,$dir,*) continue;;
362	    esac
363	    ;;
364	esac
365	if [ -d $path ]; then
366	    case "$path" in
367	    */..) ldir=${dir%/*};;
368	    *) ldir=$path;;
369	    esac
370	    continue
371	fi
372	[ -f $path ] || continue
373	case "$dir" in
374	$CWD) continue;;		# ignore
375	$src_re)
376	    seenit="$seenit,$seen"
377	    echo $dir >> $tf.srcdep
378	    case "$DPDEPS,$reldir,$seensrc," in
379	    ,*) ;;
380	    *)	seensrc="$seensrc,$path"
381		echo "DPDEPS_$dir/${path##*/} += $RELDIR" >> $tf.dpdeps
382		;;
383	    esac
384	    continue
385	    ;;
386	esac
387	# if there is a .dirdep we cannot skip
388	# just because we've seen the dir before.
389	if [ -s $path.dirdep ]; then
390	    # this file contains:
391	    # '# ${RELDIR}.<machine>'
392	    echo $path.dirdep >> $tf.qual
393	    continue
394	elif [ -s $dir.dirdep ]; then
395	    echo $dir.dirdep >> $tf.qual
396	    seenit="$seenit,$seen"
397	    continue
398	fi
399	seenit="$seenit,$seen"
400	case "$dir" in
401	$obj_re)
402	    echo $dir;;
403	esac
404    done > $tf.dirdep
405    : version=$version
406    case "$version" in
407    0) error "no filemon data";;
408    esac
409    : eof_token=$eof_token
410    case "$eof_token" in
411    0) error "truncated filemon data";;
412    esac
413    for p in $epids
414    do
415	: p=$p
416	case " $xpids " in
417	*" $p "*) ;;
418	*) error "missing eXit for pid $p";;
419	esac
420    done ) || exit 1
421    _nl=echo
422    for f in $tf.dirdep $tf.qual $tf.srcdep
423    do
424	[ -s $f ] || continue
425	case $f in
426	*qual) # a list of .dirdep files
427	    # we can prefix everything with $OBJTOP to
428	    # tell gendirdeps.mk that these are
429	    # DIRDEP entries, since they are already
430	    # qualified with .<machine> as needed.
431	    # We strip .$MACHINE though
432	    xargs cat < $f | sort -u |
433	    sed "s,^# ,,;s,^,$OBJTOP/,;s,\.${TARGET_SPEC:-$MACHINE}\$,,;s,\.$MACHINE\$,,"
434	    ;;
435	*)  sort -u $f;;
436	esac
437	_nl=:
438    done
439    if [ -s $tf.dpdeps ]; then
440	case "$DPDEPS" in
441	*/*) ;;
442	*) echo > $DPDEPS;;		# the echo is needed!
443	esac
444	sort -u $tf.dpdeps |
445	sed "s,${SRCTOP}/,,;s,${SB_BACKING_SB:-$SB}/src/,," >> $DPDEPS
446    fi
447    # ensure we produce _something_ else egrep -v gets upset
448    $_nl
449}
450
451case /$0 in
452*/meta2dep*) meta2deps "$@";;
453*/meta2dirs*) meta2dirs "$@";;
454*/meta2src*) meta2src "$@";;
455esac
456