xref: /freebsd/share/mk/meta2deps.py (revision 7a0c41d5d7d4e9770ef6f5d56f893efc8f18ab7c)
1# $FreeBSD$
2#!/usr/bin/env python
3
4from __future__ import print_function
5
6"""
7This script parses each "meta" file and extracts the
8information needed to deduce build and src dependencies.
9
10It works much the same as the original shell script, but is
11*much* more efficient.
12
13The parsing work is handled by the class MetaFile.
14We only pay attention to a subset of the information in the
15"meta" files.  Specifically:
16
17'CWD'	to initialize our notion.
18
19'C'	to track chdir(2) on a per process basis
20
21'R'	files read are what we really care about.
22	directories read, provide a clue to resolving
23	subsequent relative paths.  That is if we cannot find
24	them relative to 'cwd', we check relative to the last
25	dir read.
26
27'W'	files opened for write or read-write,
28	for filemon V3 and earlier.
29
30'E'	files executed.
31
32'L'	files linked
33
34'V'	the filemon version, this record is used as a clue
35	that we have reached the interesting bit.
36
37"""
38
39"""
40RCSid:
41	$Id: meta2deps.py,v 1.19 2016/04/02 20:45:40 sjg Exp $
42
43	Copyright (c) 2011-2013, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        return path
85    if path == '.':
86        return cwd
87    if path.startswith('./'):
88        return cwd + path[1:]
89    if last_dir == cwd:
90        last_dir = None
91    for d in [last_dir, cwd]:
92        if not d:
93            continue
94        p = '/'.join([d,path])
95        if debug > 2:
96            print("looking for:", p, end=' ', file=debug_out)
97        if not os.path.exists(p):
98            if debug > 2:
99                print("nope", file=debug_out)
100            p = None
101            continue
102        if debug > 2:
103            print("found:", p, file=debug_out)
104        return p
105    return None
106
107def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
108    """
109    Return an absolute path, resolving via cwd or last_dir if needed.
110    this gets called a lot, so we try to avoid calling realpath
111    until we know we have something.
112    """
113    rpath = resolve(path, cwd, last_dir, debug, debug_out)
114    if rpath:
115        path = rpath
116    if (path.find('/') < 0 or
117	path.find('./') > 0 or
118        path.endswith('/..') or
119        os.path.islink(path)):
120        return os.path.realpath(path)
121    return path
122
123def sort_unique(list, cmp=None, key=None, reverse=False):
124    list.sort(cmp, key, reverse)
125    nl = []
126    le = None
127    for e in list:
128        if e == le:
129            continue
130        nl.append(e)
131    return nl
132
133def add_trims(x):
134    return ['/' + x + '/',
135            '/' + x,
136            x + '/',
137            x]
138
139class MetaFile:
140    """class to parse meta files generated by bmake."""
141
142    conf = None
143    dirdep_re = None
144    host_target = None
145    srctops = []
146    objroots = []
147    excludes = []
148    seen = {}
149    obj_deps = []
150    src_deps = []
151    file_deps = []
152
153    def __init__(self, name, conf={}):
154        """if name is set we will parse it now.
155        conf can have the follwing keys:
156
157        SRCTOPS	list of tops of the src tree(s).
158
159        CURDIR	the src directory 'bmake' was run from.
160
161        RELDIR	the relative path from SRCTOP to CURDIR
162
163        MACHINE	the machine we built for.
164        	set to 'none' if we are not cross-building.
165		More specifically if machine cannot be deduced from objdirs.
166
167        TARGET_SPEC
168        	Sometimes MACHINE isn't enough.
169
170        HOST_TARGET
171		when we build for the pseudo machine 'host'
172		the object tree uses HOST_TARGET rather than MACHINE.
173
174        OBJROOTS a list of the common prefix for all obj dirs it might
175		end in '/' or '-'.
176
177        DPDEPS	names an optional file to which per file dependencies
178		will be appended.
179		For example if 'some/path/foo.h' is read from SRCTOP
180		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
181		This can allow 'bmake' to learn all the dirs within
182 		the tree that depend on 'foo.h'
183
184	EXCLUDES
185		A list of paths to ignore.
186		ccache(1) can otherwise be trouble.
187
188        debug	desired debug level
189
190        debug_out open file to send debug output to (sys.stderr)
191
192        """
193
194        self.name = name
195        self.debug = getv(conf, 'debug', 0)
196        self.debug_out = getv(conf, 'debug_out', sys.stderr)
197
198        self.machine = getv(conf, 'MACHINE', '')
199        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
200        self.target_spec = getv(conf, 'TARGET_SPEC', '')
201        self.curdir = getv(conf, 'CURDIR')
202        self.reldir = getv(conf, 'RELDIR')
203        self.dpdeps = getv(conf, 'DPDEPS')
204        self.line = 0
205
206        if not self.conf:
207            # some of the steps below we want to do only once
208            self.conf = conf
209            self.host_target = getv(conf, 'HOST_TARGET')
210            for srctop in getv(conf, 'SRCTOPS', []):
211                if srctop[-1] != '/':
212                    srctop += '/'
213                if not srctop in self.srctops:
214                    self.srctops.append(srctop)
215                _srctop = os.path.realpath(srctop)
216                if _srctop[-1] != '/':
217                    _srctop += '/'
218                if not _srctop in self.srctops:
219                    self.srctops.append(_srctop)
220
221            trim_list = add_trims(self.machine)
222            if self.machine == 'host':
223                trim_list += add_trims(self.host_target)
224            if self.target_spec:
225                trim_list += add_trims(self.target_spec)
226
227            for objroot in getv(conf, 'OBJROOTS', []):
228                for e in trim_list:
229                    if objroot.endswith(e):
230                        # this is not what we want - fix it
231                        objroot = objroot[0:-len(e)]
232                        if e.endswith('/'):
233                            objroot += '/'
234                if not objroot in self.objroots:
235                    self.objroots.append(objroot)
236                    _objroot = os.path.realpath(objroot)
237                    if objroot[-1] == '/':
238                        _objroot += '/'
239                    if not _objroot in self.objroots:
240                        self.objroots.append(_objroot)
241
242            # we want the longest match
243            self.srctops.sort(reverse=True)
244            self.objroots.sort(reverse=True)
245
246            self.excludes = getv(conf, 'EXCLUDES', [])
247
248            if self.debug:
249                print("host_target=", self.host_target, file=self.debug_out)
250                print("srctops=", self.srctops, file=self.debug_out)
251                print("objroots=", self.objroots, file=self.debug_out)
252                print("excludes=", self.excludes, file=self.debug_out)
253
254            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
255
256        if self.dpdeps and not self.reldir:
257            if self.debug:
258                print("need reldir:", end=' ', file=self.debug_out)
259            if self.curdir:
260                srctop = self.find_top(self.curdir, self.srctops)
261                if srctop:
262                    self.reldir = self.curdir.replace(srctop,'')
263                    if self.debug:
264                        print(self.reldir, file=self.debug_out)
265            if not self.reldir:
266                self.dpdeps = None      # we cannot do it?
267
268        self.cwd = os.getcwd()          # make sure this is initialized
269        self.last_dir = self.cwd
270
271        if name:
272            self.try_parse()
273
274    def reset(self):
275        """reset state if we are being passed meta files from multiple directories."""
276        self.seen = {}
277        self.obj_deps = []
278        self.src_deps = []
279        self.file_deps = []
280
281    def dirdeps(self, sep='\n'):
282        """return DIRDEPS"""
283        return sep.strip() + sep.join(self.obj_deps)
284
285    def src_dirdeps(self, sep='\n'):
286        """return SRC_DIRDEPS"""
287        return sep.strip() + sep.join(self.src_deps)
288
289    def file_depends(self, out=None):
290        """Append DPDEPS_${file} += ${RELDIR}
291        for each file we saw, to the output file."""
292        if not self.reldir:
293            return None
294        for f in sort_unique(self.file_deps):
295            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
296
297    def seenit(self, dir):
298        """rememer that we have seen dir."""
299        self.seen[dir] = 1
300
301    def add(self, list, data, clue=''):
302        """add data to list if it isn't already there."""
303        if data not in list:
304            list.append(data)
305            if self.debug:
306                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
307
308    def find_top(self, path, list):
309        """the logical tree may be split across multiple trees"""
310        for top in list:
311            if path.startswith(top):
312                if self.debug > 2:
313                    print("found in", top, file=self.debug_out)
314                return top
315        return None
316
317    def find_obj(self, objroot, dir, path, input):
318        """return path within objroot, taking care of .dirdep files"""
319        ddep = None
320        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
321            if not ddep and os.path.exists(ddepf):
322                ddep = open(ddepf, 'r').readline().strip('# \n')
323                if self.debug > 1:
324                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
325                if ddep.endswith(self.machine):
326                    ddep = ddep[0:-(1+len(self.machine))]
327                elif self.target_spec and ddep.endswith(self.target_spec):
328                    ddep = ddep[0:-(1+len(self.target_spec))]
329
330        if not ddep:
331            # no .dirdeps, so remember that we've seen the raw input
332            self.seenit(input)
333            self.seenit(dir)
334            if self.machine == 'none':
335                if dir.startswith(objroot):
336                    return dir.replace(objroot,'')
337                return None
338            m = self.dirdep_re.match(dir.replace(objroot,''))
339            if m:
340                ddep = m.group(2)
341                dmachine = m.group(1)
342                if dmachine != self.machine:
343                    if not (self.machine == 'host' and
344                            dmachine == self.host_target):
345                        if self.debug > 2:
346                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
347                        ddep += '.' + dmachine
348
349        return ddep
350
351    def try_parse(self, name=None, file=None):
352        """give file and line number causing exception"""
353        try:
354            self.parse(name, file)
355        except:
356            # give a useful clue
357            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
358            raise
359
360    def parse(self, name=None, file=None):
361        """A meta file looks like:
362
363	# Meta data file "path"
364	CMD "command-line"
365	CWD "cwd"
366	TARGET "target"
367	-- command output --
368	-- filemon acquired metadata --
369	# buildmon version 3
370	V 3
371	C "pid" "cwd"
372	E "pid" "path"
373	F "pid" "child"
374	R "pid" "path"
375	W "pid" "path"
376	X "pid" "status"
377	D "pid" "path"
378	L "pid" "src" "target"
379	M "pid" "old" "new"
380	S "pid" "path"
381	# Bye bye
382
383	We go to some effort to avoid processing a dependency more than once.
384	Of the above record types only C,E,F,L,R,V and W are of interest.
385        """
386
387        version = 0                     # unknown
388        if name:
389            self.name = name;
390        if file:
391            f = file
392            cwd = self.last_dir = self.cwd
393        else:
394            f = open(self.name, 'r')
395        skip = True
396        pid_cwd = {}
397        pid_last_dir = {}
398        last_pid = 0
399
400        self.line = 0
401        if self.curdir:
402            self.seenit(self.curdir)    # we ignore this
403
404        interesting = 'CEFLRV'
405        for line in f:
406            self.line += 1
407            # ignore anything we don't care about
408            if not line[0] in interesting:
409                continue
410            if self.debug > 2:
411                print("input:", line, end=' ', file=self.debug_out)
412            w = line.split()
413
414            if skip:
415                if w[0] == 'V':
416                    skip = False
417                    version = int(w[1])
418                    """
419                    if version < 4:
420                        # we cannot ignore 'W' records
421                        # as they may be 'rw'
422                        interesting += 'W'
423                    """
424                elif w[0] == 'CWD':
425                    self.cwd = cwd = self.last_dir = w[1]
426                    self.seenit(cwd)    # ignore this
427                    if self.debug:
428                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
429                continue
430
431            pid = int(w[1])
432            if pid != last_pid:
433                if last_pid:
434                    pid_cwd[last_pid] = cwd
435                    pid_last_dir[last_pid] = self.last_dir
436                cwd = getv(pid_cwd, pid, self.cwd)
437                self.last_dir = getv(pid_last_dir, pid, self.cwd)
438                last_pid = pid
439
440            # process operations
441            if w[0] == 'F':
442                npid = int(w[2])
443                pid_cwd[npid] = cwd
444                pid_last_dir[npid] = cwd
445                last_pid = npid
446                continue
447            elif w[0] == 'C':
448                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
449                if cwd.endswith('/.'):
450                    cwd = cwd[0:-2]
451                self.last_dir = cwd
452                if self.debug > 1:
453                    print("cwd=", cwd, file=self.debug_out)
454                continue
455
456            if w[2] in self.seen:
457                if self.debug > 2:
458                    print("seen:", w[2], file=self.debug_out)
459                continue
460            # file operations
461            if w[0] in 'ML':
462                # these are special, tread src as read and
463                # target as write
464                self.parse_path(w[1].strip("'"), cwd, 'R', w)
465                self.parse_path(w[2].strip("'"), cwd, 'W', w)
466                continue
467            elif w[0] in 'ERWS':
468                path = w[2]
469                self.parse_path(path, cwd, w[0], w)
470
471        if not file:
472            f.close()
473
474    def parse_path(self, path, cwd, op=None, w=[]):
475        """look at a path for the op specified"""
476
477        if not op:
478            op = w[0]
479
480        # we are never interested in .dirdep files as dependencies
481        if path.endswith('.dirdep'):
482            return
483        for p in self.excludes:
484            if p and path.startswith(p):
485                if self.debug > 2:
486                    print("exclude:", p, path, file=self.debug_out)
487                return
488        # we don't want to resolve the last component if it is
489        # a symlink
490        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
491        if not path:
492            return
493        dir,base = os.path.split(path)
494        if dir in self.seen:
495            if self.debug > 2:
496                print("seen:", dir, file=self.debug_out)
497            return
498        # we can have a path in an objdir which is a link
499        # to the src dir, we may need to add dependencies for each
500        rdir = dir
501        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
502        if rdir == dir or rdir.find('./') > 0:
503            rdir = None
504        # now put path back together
505        path = '/'.join([dir,base])
506        if self.debug > 1:
507            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
508        if op in 'RWS':
509            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
510                if self.debug > 1:
511                    print("skipping:", path, file=self.debug_out)
512                return
513            if os.path.isdir(path):
514                if op in 'RW':
515                    self.last_dir = path;
516                if self.debug > 1:
517                    print("ldir=", self.last_dir, file=self.debug_out)
518                return
519
520        if op in 'ERW':
521            # finally, we get down to it
522            if dir == self.cwd or dir == self.curdir:
523                return
524            srctop = self.find_top(path, self.srctops)
525            if srctop:
526                if self.dpdeps:
527                    self.add(self.file_deps, path.replace(srctop,''), 'file')
528                self.add(self.src_deps, dir.replace(srctop,''), 'src')
529                self.seenit(w[2])
530                self.seenit(dir)
531                if rdir and not rdir.startswith(srctop):
532                    dir = rdir      # for below
533                    rdir = None
534                else:
535                    return
536
537            objroot = None
538            for dir in [dir,rdir]:
539                if not dir:
540                    continue
541                objroot = self.find_top(dir, self.objroots)
542                if objroot:
543                    break
544            if objroot:
545                ddep = self.find_obj(objroot, dir, path, w[2])
546                if ddep:
547                    self.add(self.obj_deps, ddep, 'obj')
548            else:
549                # don't waste time looking again
550                self.seenit(w[2])
551                self.seenit(dir)
552
553
554def main(argv, klass=MetaFile, xopts='', xoptf=None):
555    """Simple driver for class MetaFile.
556
557    Usage:
558        script [options] [key=value ...] "meta" ...
559
560    Options and key=value pairs contribute to the
561    dictionary passed to MetaFile.
562
563    -S "SRCTOP"
564                add "SRCTOP" to the "SRCTOPS" list.
565
566    -C "CURDIR"
567
568    -O "OBJROOT"
569                add "OBJROOT" to the "OBJROOTS" list.
570
571    -m "MACHINE"
572
573    -a "MACHINE_ARCH"
574
575    -H "HOST_TARGET"
576
577    -D "DPDEPS"
578
579    -d  bumps debug level
580
581    """
582    import getopt
583
584    # import Psyco if we can
585    # it can speed things up quite a bit
586    have_psyco = 0
587    try:
588        import psyco
589        psyco.full()
590        have_psyco = 1
591    except:
592        pass
593
594    conf = {
595        'SRCTOPS': [],
596        'OBJROOTS': [],
597        'EXCLUDES': [],
598        }
599
600    try:
601        machine = os.environ['MACHINE']
602        if machine:
603            conf['MACHINE'] = machine
604        machine_arch = os.environ['MACHINE_ARCH']
605        if machine_arch:
606            conf['MACHINE_ARCH'] = machine_arch
607        srctop = os.environ['SB_SRC']
608        if srctop:
609            conf['SRCTOPS'].append(srctop)
610        objroot = os.environ['SB_OBJROOT']
611        if objroot:
612            conf['OBJROOTS'].append(objroot)
613    except:
614        pass
615
616    debug = 0
617    output = True
618
619    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
620    for o, a in opts:
621        if o == '-a':
622            conf['MACHINE_ARCH'] = a
623        elif o == '-d':
624            debug += 1
625        elif o == '-q':
626            output = False
627        elif o == '-H':
628            conf['HOST_TARGET'] = a
629        elif o == '-S':
630            if a not in conf['SRCTOPS']:
631                conf['SRCTOPS'].append(a)
632        elif o == '-C':
633            conf['CURDIR'] = a
634        elif o == '-O':
635            if a not in conf['OBJROOTS']:
636                conf['OBJROOTS'].append(a)
637        elif o == '-R':
638            conf['RELDIR'] = a
639        elif o == '-D':
640            conf['DPDEPS'] = a
641        elif o == '-m':
642            conf['MACHINE'] = a
643        elif o == '-T':
644            conf['TARGET_SPEC'] = a
645        elif o == '-X':
646            if a not in conf['EXCLUDES']:
647                conf['EXCLUDES'].append(a)
648        elif xoptf:
649            xoptf(o, a, conf)
650
651    conf['debug'] = debug
652
653    # get any var=val assignments
654    eaten = []
655    for a in args:
656        if a.find('=') > 0:
657            k,v = a.split('=')
658            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
659                if k == 'SRCTOP':
660                    k = 'SRCTOPS'
661                elif k == 'OBJROOT':
662                    k = 'OBJROOTS'
663                if v not in conf[k]:
664                    conf[k].append(v)
665            else:
666                conf[k] = v
667            eaten.append(a)
668            continue
669        break
670
671    for a in eaten:
672        args.remove(a)
673
674    debug_out = getv(conf, 'debug_out', sys.stderr)
675
676    if debug:
677        print("config:", file=debug_out)
678        print("psyco=", have_psyco, file=debug_out)
679        for k,v in list(conf.items()):
680            print("%s=%s" % (k,v), file=debug_out)
681
682    m = None
683    for a in args:
684        if a.endswith('.meta'):
685            if not os.path.exists(a):
686                continue
687            m = klass(a, conf)
688        elif a.startswith('@'):
689            # there can actually multiple files per line
690            for line in open(a[1:]):
691                for f in line.strip().split():
692                    if not os.path.exists(f):
693                        continue
694                    m = klass(f, conf)
695
696    if output and m:
697        print(m.dirdeps())
698
699        print(m.src_dirdeps('\nsrc:'))
700
701        dpdeps = getv(conf, 'DPDEPS')
702        if dpdeps:
703            m.file_depends(open(dpdeps, 'wb'))
704
705    return m
706
707if __name__ == '__main__':
708    try:
709        main(sys.argv)
710    except:
711        # yes, this goes to stdout
712        print("ERROR: ", sys.exc_info()[1])
713        raise
714
715