xref: /freebsd/share/mk/meta2deps.sh (revision 2008043f386721d58158e37e0d7e50df8095942d)
1#!/bin/sh
2
3# NAME:
4#	meta2deps.sh - extract useful info from .meta files
5#
6# SYNOPSIS:
7#	meta2deps.sh SB="SB" "meta" ...
8#
9# DESCRIPTION:
10#	This script looks each "meta" file and extracts the
11#	information needed to deduce build and src dependencies.
12#
13#	To do this, we extract the 'CWD' record as well as all the
14#	syscall traces which describe 'R'ead, 'C'hdir and 'E'xec
15#	syscalls.
16#
17#	The typical meta file looks like::
18#.nf
19#
20#	# Meta data file "path"
21#	CMD "command-line"
22#	CWD "cwd"
23#	TARGET "target"
24#	-- command output --
25#	-- filemon acquired metadata --
26#	# buildmon version 2
27#	V 2
28#	E "pid" "path"
29#	R "pid" "path"
30#	C "pid" "cwd"
31#	R "pid" "path"
32#	X "pid" "status"
33#.fi
34#
35#	The fact that all the syscall entry lines start with a single
36#	character make these files quite easy to process using sed(1).
37#
38#	To simplify the logic the 'CWD' line is made to look like a
39#	normal 'C'hdir entry, and "cwd" is remembered so that it can
40#	be prefixed to any "path" which is not absolute.
41#
42#	If the "path" being read ends in '.srcrel' it is the content
43#	of (actually the first line of) that file that we are
44#	interested in.
45#
46#	Any "path" which lies outside of the sandbox "SB" is generally
47#	not of interest and is ignored.
48#
49#	The output, is a set of absolute paths with "SB" like:
50#.nf
51#
52#	$SB/obj-i386/bsd/gnu/lib/csu
53#	$SB/obj-i386/bsd/gnu/lib/libgcc
54#	$SB/obj-i386/bsd/include
55#	$SB/obj-i386/bsd/lib/csu/i386-elf
56#	$SB/obj-i386/bsd/lib/libc
57#	$SB/src/bsd/include
58#	$SB/src/bsd/sys/i386/include
59#	$SB/src/bsd/sys/sys
60#	$SB/src/pan-release/rtsock
61#	$SB/src/pfe-shared/include/jnx
62#.fi
63#
64#	Which can then be further processed by 'gendirdeps.mk'
65#
66#	If we are passed 'DPDEPS='"dpdeps", then for each src file
67#	outside of "CURDIR" we read, we output a line like:
68#.nf
69#
70#	DPDEPS_$path += $RELDIR
71#.fi
72#
73#	with "$path" geting turned into reldir's, so that we can end
74#	up with a list of all the directories which depend on each src
75#	file in another directory.  This can allow for efficient yet
76#	complete testing of changes.
77
78
79# RCSid:
80#	$Id: meta2deps.sh,v 1.20 2023/01/18 01:35:24 sjg Exp $
81
82# Copyright (c) 2010-2013, Juniper Networks, Inc.
83# All rights reserved.
84#
85# Redistribution and use in source and binary forms, with or without
86# modification, are permitted provided that the following conditions
87# are met:
88# 1. Redistributions of source code must retain the above copyright
89#    notice, this list of conditions and the following disclaimer.
90# 2. Redistributions in binary form must reproduce the above copyright
91#    notice, this list of conditions and the following disclaimer in the
92#    documentation and/or other materials provided with the distribution.
93#
94# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
95# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
96# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
97# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
98# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
99# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
100# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
101# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
102# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
103# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
104# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
105
106meta2src() {
107    cat /dev/null "$@" |
108    sed -n '/^R .*\.[chyl]$/s,^..[0-9]* ,,p' |
109    sort -u
110}
111
112meta2dirs() {
113    cat /dev/null "$@" |
114    sed -n '/^R .*\/.*\.[a-z0-9][^\/]*$/s,^..[0-9]* \(.*\)/[^/]*$,\1,p' |
115    sort -u
116}
117
118add_list() {
119    sep=' '
120    suffix=
121    while :
122    do
123	case "$1" in
124	"|") sep="$1"; shift;;
125	-s) suffix="$2"; shift 2;;
126	*) break;;
127	esac
128    done
129    name=$1
130    shift
131    eval list="\$$name"
132    for top in "$@"
133    do
134	case "$sep$list$sep" in
135	*"$sep$top$suffix$sep"*) continue;;
136	esac
137	list="${list:+$list$sep}$top$suffix"
138    done
139    eval "$name=\"$list\""
140}
141
142# some Linux systems have deprecated egrep in favor of grep -E
143# but not everyone supports that
144case "`echo bmake | egrep 'a|b' 2>&1`" in
145bmake) ;;
146*) egrep() { grep -E "$@"; }
147esac
148
149_excludes_f() {
150    egrep -v "$EXCLUDES"
151}
152
153error() {
154    echo "ERROR: $@" >&2
155    exit 1
156}
157
158meta2deps() {
159    DPDEPS=
160    SRCTOPS=$SRCTOP
161    OBJROOTS=
162    EXCLUDES=
163    while :
164    do
165	case "$1" in
166	*=*) eval export "$1"; shift;;
167	-a) MACHINE_ARCH=$2; shift 2;;
168	-m) MACHINE=$2; shift 2;;
169	-C) CURDIR=$2; shift 2;;
170	-H) HOST_TARGET=$2; shift 2;;
171	-S) add_list SRCTOPS $2; shift 2;;
172	-O) add_list OBJROOTS $2; shift 2;;
173	-X) add_list EXCLUDES '|' $2; shift 2;;
174	-R) RELDIR=$2; shift 2;;
175	-T) TARGET_SPEC=$2; shift 2;;
176	*) break;;
177	esac
178    done
179
180    _th= _o=
181    case "$MACHINE" in
182    host) _ht=$HOST_TARGET;;
183    esac
184
185    for o in $OBJROOTS
186    do
187	case "$MACHINE,/$o/" in
188	host,*$HOST_TARGET*) ;;
189	*$MACHINE*|*${TARGET_SPEC:-$MACHINE}*) ;;
190	*) add_list _o $o; continue;;
191	esac
192	for x in $_ht $TARGET_SPEC $MACHINE
193	do
194	    case "$o" in
195	    "") continue;;
196	    */$x/) add_list _o ${o%$x/}; o=;;
197	    */$x) add_list _o ${o%$x}; o=;;
198	    *$x/) add_list _o ${o%$x/}; o=;;
199	    *$x) add_list _o ${o%$x}; o=;;
200	    esac
201	done
202    done
203    OBJROOTS="$_o"
204
205    case "$OBJTOP" in
206    "")
207	for o in $OBJROOTS
208	do
209	    OBJTOP=$o${TARGET_SPEC:-$MACHINE}
210	    break
211	done
212	;;
213    esac
214    src_re=
215    obj_re=
216    add_list '|' -s '/*' src_re $SRCTOPS
217    add_list '|' -s '*' obj_re $OBJROOTS
218
219    [ -z "$RELDIR" ] && unset DPDEPS
220    tf=/tmp/m2d$$-$USER
221    rm -f $tf.*
222    trap 'rm -f $tf.*; trap 0' 0
223
224    > $tf.dirdep
225    > $tf.qual
226    > $tf.srcdep
227    > $tf.srcrel
228    > $tf.dpdeps
229
230    seenit=
231    seensrc=
232    lpid=
233    case "$EXCLUDES" in
234    "") _excludes=cat;;
235    *) _excludes=_excludes_f;;
236    esac
237    # handle @list files
238    case "$@" in
239    *@[!.]*)
240	for f in "$@"
241	do
242	    case "$f" in
243	    *.meta) cat $f;;
244	    @*) xargs cat < ${f#@};;
245	    *) cat $f;;
246	    esac
247	done
248	;;
249    *) cat /dev/null "$@";;
250    esac 2> /dev/null |
251    sed -e 's,^CWD,C C,;/^[#CREFLMVX] /!d' -e "s,',,g" |
252    $_excludes | ( version=no epids= xpids= eof_token=no
253    while read op pid path junk
254    do
255	: op=$op pid=$pid path=$path
256	# we track cwd and ldir (of interest) per pid
257	# CWD is bmake's cwd
258	case "$lpid,$pid" in
259	,C) CWD=$path cwd=$path ldir=$path
260	    if [ -z "$SB" ]; then
261		SB=`echo $CWD | sed 's,/obj.*,,'`
262	    fi
263	    SRCTOP=${SRCTOP:-$SB/src}
264	    case "$verion" in
265	    no) ;;		# ignore
266	    0) error "no filemon data";;
267	    *) ;;
268	    esac
269	    version=0
270	    case "$eof_token" in
271	    no) ;;		# ignore
272	    0) error "truncated filemon data";;
273	    esac
274	    eof_token=0
275	    continue
276	    ;;
277	$pid,$pid) ;;
278	[1-9]*)
279	    case "$lpid" in
280	    "") ;;
281	    *) eval ldir_$lpid=$ldir;;
282	    esac
283	    eval ldir=\${ldir_$pid:-$CWD} cwd=\${cwd_$pid:-$CWD}
284	    lpid=$pid
285	    ;;
286	esac
287
288	: op=$op path=$path
289	case "$op,$path" in
290	V,*) version=$pid; continue;;
291	W,*srcrel|*.dirdep) continue;;
292	C,*)
293	    case "$path" in
294	    /*) cwd=$path;;
295	    *) cwd=`cd $cwd/$path 2> /dev/null && /bin/pwd`;;
296	    esac
297	    # watch out for temp dirs that no longer exist
298	    test -d ${cwd:-/dev/null/no/such} || cwd=$CWD
299	    eval cwd_$pid=$cwd
300	    continue
301	    ;;
302	F,*) # $path is new pid
303	    eval cwd_$path=$cwd ldir_$path=$ldir
304	    continue
305	    ;;
306	\#,bye) eof_token=1; continue;;
307	\#*) continue;;
308	*)  dir=${path%/*}
309	    case "$op" in
310	    E)	# setid apps get no tracing so we won't see eXit
311		case `'ls' -l $path 2> /dev/null | sed 's, .*,,'` in
312		*s*) ;;
313		*) epids="$epids $pid";;
314		esac
315		;;
316	    X) xpids="$xpids $pid"; continue;;
317	    esac
318	    case "$path" in
319	    $src_re|$obj_re) ;;
320	    /*/stage/*) ;;
321	    /*) continue;;
322	    *)	for path in $ldir/$path $cwd/$path
323		do
324			test -e $path && break
325		done
326		dir=${path%/*}
327		;;
328	    esac
329	    ;;
330	esac
331	# avoid repeating ourselves...
332	case "$DPDEPS,$seensrc," in
333	,*)
334	    case ",$seenit," in
335	    *,$dir,*) continue;;
336	    esac
337	    ;;
338	*,$path,*) continue;;
339	esac
340	# canonicalize if needed
341	case "/$dir/" in
342	*/../*|*/./*)
343	    rdir=$dir
344	    dir=`cd $dir 2> /dev/null && /bin/pwd`
345	    seen="$rdir,$dir"
346	    ;;
347	*)  seen=$dir;;
348	esac
349	case "$dir" in
350	${CURDIR:-.}|"") continue;;
351	$src_re)
352	    # avoid repeating ourselves...
353	    case "$DPDEPS,$seensrc," in
354	    ,*)
355		case ",$seenit," in
356		*,$dir,*) continue;;
357		esac
358		;;
359	    esac
360	    ;;
361	*)
362	    case ",$seenit," in
363	    *,$dir,*) continue;;
364	    esac
365	    ;;
366	esac
367	if [ -d $path ]; then
368	    case "$path" in
369	    */..) ldir=${dir%/*};;
370	    *) ldir=$path;;
371	    esac
372	    continue
373	fi
374	[ -f $path ] || continue
375	case "$dir" in
376	$CWD) continue;;		# ignore
377	$src_re)
378	    seenit="$seenit,$seen"
379	    echo $dir >> $tf.srcdep
380	    case "$DPDEPS,$reldir,$seensrc," in
381	    ,*) ;;
382	    *)	seensrc="$seensrc,$path"
383		echo "DPDEPS_$dir/${path##*/} += $RELDIR" >> $tf.dpdeps
384		;;
385	    esac
386	    continue
387	    ;;
388	esac
389	# if there is a .dirdep we cannot skip
390	# just because we've seen the dir before.
391	if [ -s $path.dirdep ]; then
392	    # this file contains:
393	    # '# ${RELDIR}.<machine>'
394	    echo $path.dirdep >> $tf.qual
395	    continue
396	elif [ -s $dir.dirdep ]; then
397	    echo $dir.dirdep >> $tf.qual
398	    seenit="$seenit,$seen"
399	    continue
400	fi
401	seenit="$seenit,$seen"
402	case "$dir" in
403	$obj_re)
404	    echo $dir;;
405	esac
406    done > $tf.dirdep
407    : version=$version
408    case "$version" in
409    0) error "no filemon data";;
410    esac
411    : eof_token=$eof_token
412    case "$eof_token" in
413    0) error "truncated filemon data";;
414    esac
415    for p in $epids
416    do
417	: p=$p
418	case " $xpids " in
419	*" $p "*) ;;
420	*) error "missing eXit for pid $p";;
421	esac
422    done ) || exit 1
423    _nl=echo
424    for f in $tf.dirdep $tf.qual $tf.srcdep
425    do
426	[ -s $f ] || continue
427	case $f in
428	*qual) # a list of .dirdep files
429	    # we can prefix everything with $OBJTOP to
430	    # tell gendirdeps.mk that these are
431	    # DIRDEP entries, since they are already
432	    # qualified with .<machine> as needed.
433	    # We strip .$MACHINE though
434	    xargs cat < $f | sort -u |
435	    sed "s,^# ,,;s,^,$OBJTOP/,;s,\.${TARGET_SPEC:-$MACHINE}\$,,;s,\.$MACHINE\$,,"
436	    ;;
437	*)  sort -u $f;;
438	esac
439	_nl=:
440    done
441    if [ -s $tf.dpdeps ]; then
442	case "$DPDEPS" in
443	*/*) ;;
444	*) echo > $DPDEPS;;		# the echo is needed!
445	esac
446	sort -u $tf.dpdeps |
447	sed "s,${SRCTOP}/,,;s,${SB_BACKING_SB:-$SB}/src/,," >> $DPDEPS
448    fi
449    # ensure we produce _something_ else egrep -v gets upset
450    $_nl
451}
452
453case /$0 in
454*/meta2dep*) meta2deps "$@";;
455*/meta2dirs*) meta2dirs "$@";;
456*/meta2src*) meta2src "$@";;
457esac
458