xref: /freebsd/contrib/bmake/mk/meta2deps.sh (revision df21a004be237a1dccd03c7b47254625eea62fa9)
1#!/bin/sh
2
3# NAME:
4#	meta2deps.sh - extract useful info from .meta files
5#
6# SYNOPSIS:
7#	meta2deps.sh SB="SB" "meta" ...
8#
9# DESCRIPTION:
10#	This script looks each "meta" file and extracts the
11#	information needed to deduce build and src dependencies.
12#
13#	To do this, we extract the 'CWD' record as well as all the
14#	syscall traces which describe 'R'ead, 'C'hdir and 'E'xec
15#	syscalls.
16#
17#	The typical meta file looks like::
18#.nf
19#
20#	# Meta data file "path"
21#	CMD "command-line"
22#	CWD "cwd"
23#	TARGET "target"
24#	-- command output --
25#	-- filemon acquired metadata --
26#	# buildmon version 2
27#	V 2
28#	E "pid" "path"
29#	R "pid" "path"
30#	C "pid" "cwd"
31#	R "pid" "path"
32#	X "pid" "status"
33#.fi
34#
35#	The fact that all the syscall entry lines start with a single
36#	character make these files quite easy to process using sed(1).
37#
38#	To simplify the logic the 'CWD' line is made to look like a
39#	normal 'C'hdir entry, and "cwd" is remembered so that it can
40#	be prefixed to any "path" which is not absolute.
41#
42#	If the "path" being read ends in '.srcrel' it is the content
43#	of (actually the first line of) that file that we are
44#	interested in.
45#
46#	Any "path" which lies outside of the sandbox "SB" is generally
47#	not of interest and is ignored.
48#
49#	The output, is a set of absolute paths with "SB" like:
50#.nf
51#
52#	$SB/obj-i386/bsd/include
53#	$SB/obj-i386/bsd/lib/csu/i386
54#	$SB/obj-i386/bsd/lib/libc
55#	$SB/src/bsd/include
56#	$SB/src/bsd/sys/i386/include
57#	$SB/src/bsd/sys/sys
58#	$SB/src/pan-release/rtsock
59#	$SB/src/pfe-shared/include/jnx
60#.fi
61#
62#	Which can then be further processed by 'gendirdeps.mk'
63#
64#	If we are passed 'DPDEPS='"dpdeps", then for each src file
65#	outside of "CURDIR" we read, we output a line like:
66#.nf
67#
68#	DPDEPS_$path += $RELDIR
69#.fi
70#
71#	with "$path" geting turned into reldir's, so that we can end
72#	up with a list of all the directories which depend on each src
73#	file in another directory.  This can allow for efficient yet
74#	complete testing of changes.
75
76
77# RCSid:
78#	$Id: meta2deps.sh,v 1.24 2025/07/24 15:55:48 sjg Exp $
79
80# SPDX-License-Identifier: BSD-2-Clause
81#
82# Copyright (c) 2011-2025, Simon J. Gerraty
83# Copyright (c) 2010-2013, Juniper Networks, Inc.
84# All rights reserved.
85#
86# Redistribution and use in source and binary forms, with or without
87# modification, are permitted provided that the following conditions
88# are met:
89# 1. Redistributions of source code must retain the above copyright
90#    notice, this list of conditions and the following disclaimer.
91# 2. Redistributions in binary form must reproduce the above copyright
92#    notice, this list of conditions and the following disclaimer in the
93#    documentation and/or other materials provided with the distribution.
94#
95# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
96# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
97# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
98# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
99# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
100# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
101# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
102# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
103# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
104# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
105# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
106
107meta2src() {
108    cat /dev/null "$@" |
109    sed -n '/^R .*\.[chyl]$/s,^..[0-9]* ,,p' |
110    sort -u
111}
112
113meta2dirs() {
114    cat /dev/null "$@" |
115    sed -n '/^R .*\/.*\.[a-z0-9][^\/]*$/s,^..[0-9]* \(.*\)/[^/]*$,\1,p' |
116    sort -u
117}
118
119add_list() {
120    sep=' '
121    suffix=
122    while :
123    do
124	case "$1" in
125	"|") sep="$1"; shift;;
126	-s) suffix="$2"; shift 2;;
127	*) break;;
128	esac
129    done
130    name=$1
131    shift
132    eval list="\$$name"
133    for top in "$@"
134    do
135	case "$sep$list$sep" in
136	*"$sep$top$suffix$sep"*) continue;;
137	esac
138	list="${list:+$list$sep}$top$suffix"
139    done
140    eval "$name=\"$list\""
141}
142
143# some Linux systems have deprecated egrep in favor of grep -E
144# but not everyone supports that
145case "`echo bmake | egrep 'a|b' 2>&1`" in
146bmake) ;;
147*) egrep() { grep -E "$@"; }
148esac
149
150_excludes_f() {
151    egrep -v "$EXCLUDES"
152}
153
154error() {
155    echo "ERROR: $@" >&2
156    exit 1
157}
158
159meta2deps() {
160    DPDEPS=
161    SRCTOPS=$SRCTOP
162    OBJROOTS=
163    EXCLUDES=
164    while :
165    do
166	case "$1" in
167	*=*) eval export "$1"; shift;;
168	-a) MACHINE_ARCH=$2; shift 2;;
169	-m) MACHINE=$2; shift 2;;
170	-C) CURDIR=$2; shift 2;;
171	-H) HOST_TARGET=$2; shift 2;;
172	-S) add_list SRCTOPS $2; shift 2;;
173	-O) add_list OBJROOTS $2; shift 2;;
174	-X) add_list EXCLUDES '|' $2; shift 2;;
175	-R) RELDIR=$2; shift 2;;
176	-T) TARGET_SPEC=$2; shift 2;;
177	*) break;;
178	esac
179    done
180
181    _th= _o=
182    case "$MACHINE" in
183    host) _ht=$HOST_TARGET;;
184    esac
185
186    for o in $OBJROOTS
187    do
188	case "$MACHINE,/$o/" in
189	host,*$HOST_TARGET*) ;;
190	*$MACHINE*|*${TARGET_SPEC:-$MACHINE}*) ;;
191	*) add_list _o $o; continue;;
192	esac
193	for x in $_ht $TARGET_SPEC $MACHINE
194	do
195	    case "$o" in
196	    "") continue;;
197	    */$x/) add_list _o ${o%$x/}; o=;;
198	    */$x) add_list _o ${o%$x}; o=;;
199	    *$x/) add_list _o ${o%$x/}; o=;;
200	    *$x) add_list _o ${o%$x}; o=;;
201	    esac
202	done
203    done
204    OBJROOTS="$_o"
205
206    case "$OBJTOP" in
207    "")
208	for o in $OBJROOTS
209	do
210	    OBJTOP=$o${TARGET_SPEC:-$MACHINE}
211	    break
212	done
213	;;
214    esac
215    src_re=
216    obj_re=
217    add_list '|' -s '/*' src_re $SRCTOPS
218    add_list '|' -s '*' obj_re $OBJROOTS
219
220    [ -z "$RELDIR" ] && unset DPDEPS
221    tf=/tmp/m2d$$-$USER
222    rm -f $tf.*
223    trap 'rm -f $tf.*; trap 0' 0
224
225    > $tf.dirdep
226    > $tf.qual
227    > $tf.srcdep
228    > $tf.srcrel
229    > $tf.dpdeps
230
231    seenit=
232    seensrc=
233    lpid=
234    case "$EXCLUDES" in
235    "") _excludes=cat;;
236    *) _excludes=_excludes_f;;
237    esac
238    # handle @list files
239    case "$@" in
240    *@[!.]*)
241	for f in "$@"
242	do
243	    case "$f" in
244	    *.meta) cat $f;;
245	    @*) xargs cat < ${f#@};;
246	    *) cat $f;;
247	    esac
248	done
249	;;
250    *) cat /dev/null "$@";;
251    esac 2> /dev/null |
252    sed -e 's,^CWD,C C,;/^[#CREFLMVWX] /!d' -e "s,',,g" |
253    $_excludes | ( version=no epids= xpids= eof_token=no
254    while read op pid path path2
255    do
256	: op=$op pid=$pid path=$path path2=$path2
257	# first a sanity check - filemon on Linux is not very reliable
258	# path2 should only be non-empty for op L or M
259	# and it should not contain spaces.
260	case "$op,$path2" in
261	\#*) ;;			# ok
262	[LM],) error "missing path2 in: '$op $pid $path'";;
263	[LMX],*" "*) error "wrong number of words in: '$op $pid $path $path2'";;
264	*,|[LMX],*) ;;		# ok
265	*) error "wrong number of words in: '$op $pid $path $path2'";;
266	esac
267	# we track cwd and ldir (of interest) per pid
268	# CWD is bmake's cwd
269	case "$lpid,$pid" in
270	,C) CWD=$path cwd=$path ldir=$path
271	    if [ -z "$SB" ]; then
272		SB=`echo $CWD | sed 's,/obj.*,,'`
273	    fi
274	    SRCTOP=${SRCTOP:-$SB/src}
275	    case "$verion" in
276	    no) ;;		# ignore
277	    0) error "no filemon data";;
278	    *) ;;
279	    esac
280	    version=0
281	    case "$eof_token" in
282	    no) ;;		# ignore
283	    0) error "truncated filemon data";;
284	    esac
285	    eof_token=0
286	    continue
287	    ;;
288	$pid,$pid) ;;
289	[1-9]*)
290	    case "$lpid" in
291	    "") ;;
292	    *) eval ldir_$lpid=$ldir;;
293	    esac
294	    eval ldir=\${ldir_$pid:-$CWD} cwd=\${cwd_$pid:-$CWD}
295	    lpid=$pid
296	    ;;
297	esac
298
299	: op=$op path=$path
300	case "$op,$path" in
301	V,*) version=$pid; continue;;
302	W,*srcrel|*.dirdep) continue;;
303	C,*)
304	    case "$path" in
305	    /*) cwd=$path;;
306	    *) cwd=`cd $cwd/$path 2> /dev/null && /bin/pwd`;;
307	    esac
308	    # watch out for temp dirs that no longer exist
309	    test -d ${cwd:-/dev/null/no/such} || cwd=$CWD
310	    eval cwd_$pid=$cwd
311	    continue
312	    ;;
313	F,*) # $path is new pid
314	    eval cwd_$path=$cwd ldir_$path=$ldir
315	    continue
316	    ;;
317	\#,bye) eof_token=1; continue;;
318	\#*) continue;;
319	*)  dir=${path%/*}
320	    case "$op" in
321	    E)	# setid apps get no tracing so we won't see eXit
322		case `'ls' -l $path 2> /dev/null | sed 's, .*,,'` in
323		*s*) ;;
324		*) epids="$epids $pid";;
325		esac
326		;;
327	    X) xpids="$xpids $pid"; continue;;
328	    esac
329	    case "$path" in
330	    $src_re|$obj_re) ;;
331	    /*/stage/*) ;;
332	    /*) continue;;
333	    *)
334		rlist="$ldir/$path $cwd/$path"
335		case "$op,$path" in
336		[ML],../*) rlist="$rlist $path2/$path `dirname $path2`/$path";;
337		esac
338		for path in $rlist
339		do
340		    test -e $path && break
341		done
342		dir=${path%/*}
343		;;
344	    esac
345	    ;;
346	esac
347	# avoid repeating ourselves...
348	case "$DPDEPS,$seensrc," in
349	,*)
350	    case ",$seenit," in
351	    *,$dir,*) continue;;
352	    esac
353	    ;;
354	*,$path,*) continue;;
355	esac
356	# canonicalize if needed
357	case "/$dir/" in
358	*/../*|*/./*)
359	    rdir=$dir
360	    dir=`cd $dir 2> /dev/null && /bin/pwd`
361	    seen="$rdir,$dir"
362	    ;;
363	*)  seen=$dir;;
364	esac
365	case "$dir" in
366	${CURDIR:-.}|"") continue;;
367	$src_re)
368	    # avoid repeating ourselves...
369	    case "$DPDEPS,$seensrc," in
370	    ,*)
371		case ",$seenit," in
372		*,$dir,*) continue;;
373		esac
374		;;
375	    esac
376	    ;;
377	*)
378	    case ",$seenit," in
379	    *,$dir,*) continue;;
380	    esac
381	    ;;
382	esac
383	if [ -d $path ]; then
384	    case "$path" in
385	    */..) ldir=${dir%/*};;
386	    *) ldir=$path;;
387	    esac
388	    continue
389	fi
390	[ -f $path ] || continue
391	case "$dir" in
392	$CWD) continue;;		# ignore
393	$src_re)
394	    seenit="$seenit,$seen"
395	    echo $dir >> $tf.srcdep
396	    case "$DPDEPS,$reldir,$seensrc," in
397	    ,*) ;;
398	    *)	seensrc="$seensrc,$path"
399		echo "DPDEPS_$dir/${path##*/} += $RELDIR" >> $tf.dpdeps
400		;;
401	    esac
402	    continue
403	    ;;
404	esac
405	# if there is a .dirdep we cannot skip
406	# just because we've seen the dir before.
407	if [ -s $path.dirdep ]; then
408	    # this file contains:
409	    # '# ${RELDIR}.<machine>'
410	    echo $path.dirdep >> $tf.qual
411	    continue
412	elif [ -s $dir.dirdep ]; then
413	    echo $dir.dirdep >> $tf.qual
414	    seenit="$seenit,$seen"
415	    continue
416	fi
417	seenit="$seenit,$seen"
418	case "$dir" in
419	$obj_re)
420	    echo $dir;;
421	esac
422    done > $tf.dirdep
423    : version=$version
424    case "$version" in
425    0) error "no filemon data";;
426    esac
427    : eof_token=$eof_token
428    case "$eof_token" in
429    0) error "truncated filemon data";;
430    esac
431    for p in $epids
432    do
433	: p=$p
434	case " $xpids " in
435	*" $p "*) ;;
436	*) error "missing eXit for pid $p";;
437	esac
438    done ) || exit 1
439    _nl=echo
440    for f in $tf.dirdep $tf.qual $tf.srcdep
441    do
442	[ -s $f ] || continue
443	case $f in
444	*qual) # a list of .dirdep files
445	    # we can prefix everything with $OBJTOP to
446	    # tell gendirdeps.mk that these are
447	    # DIRDEP entries, since they are already
448	    # qualified with .<machine> as needed.
449	    # We strip .$MACHINE though
450	    xargs cat < $f | sort -u |
451	    sed "s,^# ,,;s,^,$OBJTOP/,;s,\.${TARGET_SPEC:-$MACHINE}\$,,;s,\.$MACHINE\$,,"
452	    ;;
453	*)  sort -u $f;;
454	esac
455	_nl=:
456    done
457    if [ -s $tf.dpdeps ]; then
458	case "$DPDEPS" in
459	*/*) ;;
460	*) echo > $DPDEPS;;		# the echo is needed!
461	esac
462	sort -u $tf.dpdeps |
463	sed "s,${SRCTOP}/,,;s,${SB_BACKING_SB:-$SB}/src/,," >> $DPDEPS
464    fi
465    # ensure we produce _something_ else egrep -v gets upset
466    $_nl
467}
468
469case /$0 in
470*/meta2dep*) meta2deps "$@";;
471*/meta2dirs*) meta2dirs "$@";;
472*/meta2src*) meta2src "$@";;
473esac
474