xref: /freebsd/share/mk/meta2deps.py (revision 9268022b74279434ed6300244e3f977e56a8ceb5)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.16 2013/12/20 06:08:52 sjg Exp $
41
42	Copyright (c) 2011-2013, Juniper Networks, Inc.
43	All rights reserved.
44
45	Redistribution and use in source and binary forms, with or without
46	modification, are permitted provided that the following conditions
47	are met:
48	1. Redistributions of source code must retain the above copyright
49	   notice, this list of conditions and the following disclaimer.
50	2. Redistributions in binary form must reproduce the above copyright
51	   notice, this list of conditions and the following disclaimer in the
52	   documentation and/or other materials provided with the distribution.
53
54	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71    """Lookup key in dict and return value or the supplied default."""
72    if key in dict:
73        return dict[key]
74    return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77    """
78    Return an absolute path, resolving via cwd or last_dir if needed.
79    """
80    if path.endswith('/.'):
81        path = path[0:-2]
82    if len(path) > 0 and path[0] == '/':
83        return path
84    if path == '.':
85        return cwd
86    if path.startswith('./'):
87        return cwd + path[1:]
88    if last_dir == cwd:
89        last_dir = None
90    for d in [last_dir, cwd]:
91        if not d:
92            continue
93        p = '/'.join([d,path])
94        if debug > 2:
95            print("looking for:", p, end=' ', file=debug_out)
96        if not os.path.exists(p):
97            if debug > 2:
98                print("nope", file=debug_out)
99            p = None
100            continue
101        if debug > 2:
102            print("found:", p, file=debug_out)
103        return p
104    return None
105
106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
107    """
108    Return an absolute path, resolving via cwd or last_dir if needed.
109    this gets called a lot, so we try to avoid calling realpath
110    until we know we have something.
111    """
112    rpath = resolve(path, cwd, last_dir, debug, debug_out)
113    if rpath:
114        path = rpath
115    if (path.find('./') > 0 or
116        path.endswith('/..') or
117        os.path.islink(path)):
118        return os.path.realpath(path)
119    return path
120
121def sort_unique(list, cmp=None, key=None, reverse=False):
122    list.sort(cmp, key, reverse)
123    nl = []
124    le = None
125    for e in list:
126        if e == le:
127            continue
128        nl.append(e)
129    return nl
130
131def add_trims(x):
132    return ['/' + x + '/',
133            '/' + x,
134            x + '/',
135            x]
136
137class MetaFile:
138    """class to parse meta files generated by bmake."""
139
140    conf = None
141    dirdep_re = None
142    host_target = None
143    srctops = []
144    objroots = []
145
146    seen = {}
147    obj_deps = []
148    src_deps = []
149    file_deps = []
150
151    def __init__(self, name, conf={}):
152        """if name is set we will parse it now.
153        conf can have the follwing keys:
154
155        SRCTOPS	list of tops of the src tree(s).
156
157        CURDIR	the src directory 'bmake' was run from.
158
159        RELDIR	the relative path from SRCTOP to CURDIR
160
161        MACHINE	the machine we built for.
162        	set to 'none' if we are not cross-building.
163		More specifically if machine cannot be deduced from objdirs.
164
165        TARGET_SPEC
166        	Sometimes MACHINE isn't enough.
167
168        HOST_TARGET
169		when we build for the psuedo machine 'host'
170		the object tree uses HOST_TARGET rather than MACHINE.
171
172        OBJROOTS a list of the common prefix for all obj dirs it might
173		end in '/' or '-'.
174
175        DPDEPS	names an optional file to which per file dependencies
176		will be appended.
177		For example if 'some/path/foo.h' is read from SRCTOP
178		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
179		This can allow 'bmake' to learn all the dirs within
180 		the tree that depend on 'foo.h'
181
182        debug	desired debug level
183
184        debug_out open file to send debug output to (sys.stderr)
185
186        """
187
188        self.name = name
189        self.debug = getv(conf, 'debug', 0)
190        self.debug_out = getv(conf, 'debug_out', sys.stderr)
191
192        self.machine = getv(conf, 'MACHINE', '')
193        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
194        self.target_spec = getv(conf, 'TARGET_SPEC', '')
195        self.curdir = getv(conf, 'CURDIR')
196        self.reldir = getv(conf, 'RELDIR')
197        self.dpdeps = getv(conf, 'DPDEPS')
198        self.line = 0
199
200        if not self.conf:
201            # some of the steps below we want to do only once
202            self.conf = conf
203            self.host_target = getv(conf, 'HOST_TARGET')
204            for srctop in getv(conf, 'SRCTOPS', []):
205                if srctop[-1] != '/':
206                    srctop += '/'
207                if not srctop in self.srctops:
208                    self.srctops.append(srctop)
209                _srctop = os.path.realpath(srctop)
210                if _srctop[-1] != '/':
211                    _srctop += '/'
212                if not _srctop in self.srctops:
213                    self.srctops.append(_srctop)
214
215            trim_list = add_trims(self.machine)
216            if self.machine == 'host':
217                trim_list += add_trims(self.host_target)
218            if self.target_spec:
219                trim_list += add_trims(self.target_spec)
220
221            for objroot in getv(conf, 'OBJROOTS', []):
222                for e in trim_list:
223                    if objroot.endswith(e):
224                        # this is not what we want - fix it
225                        objroot = objroot[0:-len(e)]
226                        if e.endswith('/'):
227                            objroot += '/'
228                if not objroot in self.objroots:
229                    self.objroots.append(objroot)
230                    _objroot = os.path.realpath(objroot)
231                    if objroot[-1] == '/':
232                        _objroot += '/'
233                    if not _objroot in self.objroots:
234                        self.objroots.append(_objroot)
235
236            # we want the longest match
237            self.srctops.sort(reverse=True)
238            self.objroots.sort(reverse=True)
239
240            if self.debug:
241                print("host_target=", self.host_target, file=self.debug_out)
242                print("srctops=", self.srctops, file=self.debug_out)
243                print("objroots=", self.objroots, file=self.debug_out)
244
245            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
246
247        if self.dpdeps and not self.reldir:
248            if self.debug:
249                print("need reldir:", end=' ', file=self.debug_out)
250            if self.curdir:
251                srctop = self.find_top(self.curdir, self.srctops)
252                if srctop:
253                    self.reldir = self.curdir.replace(srctop,'')
254                    if self.debug:
255                        print(self.reldir, file=self.debug_out)
256            if not self.reldir:
257                self.dpdeps = None      # we cannot do it?
258
259        self.cwd = os.getcwd()          # make sure this is initialized
260
261        if name:
262            self.try_parse()
263
264    def reset(self):
265        """reset state if we are being passed meta files from multiple directories."""
266        self.seen = {}
267        self.obj_deps = []
268        self.src_deps = []
269        self.file_deps = []
270
271    def dirdeps(self, sep='\n'):
272        """return DIRDEPS"""
273        return sep.strip() + sep.join(self.obj_deps)
274
275    def src_dirdeps(self, sep='\n'):
276        """return SRC_DIRDEPS"""
277        return sep.strip() + sep.join(self.src_deps)
278
279    def file_depends(self, out=None):
280        """Append DPDEPS_${file} += ${RELDIR}
281        for each file we saw, to the output file."""
282        if not self.reldir:
283            return None
284        for f in sort_unique(self.file_deps):
285            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
286
287    def seenit(self, dir):
288        """rememer that we have seen dir."""
289        self.seen[dir] = 1
290
291    def add(self, list, data, clue=''):
292        """add data to list if it isn't already there."""
293        if data not in list:
294            list.append(data)
295            if self.debug:
296                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
297
298    def find_top(self, path, list):
299        """the logical tree may be split accross multiple trees"""
300        for top in list:
301            if path.startswith(top):
302                if self.debug > 2:
303                    print("found in", top, file=self.debug_out)
304                return top
305        return None
306
307    def find_obj(self, objroot, dir, path, input):
308        """return path within objroot, taking care of .dirdep files"""
309        ddep = None
310        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
311            if not ddep and os.path.exists(ddepf):
312                ddep = open(ddepf, 'r').readline().strip('# \n')
313                if self.debug > 1:
314                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
315                if ddep.endswith(self.machine):
316                    ddep = ddep[0:-(1+len(self.machine))]
317                elif self.target_spec and ddep.endswith(self.target_spec):
318                    ddep = ddep[0:-(1+len(self.target_spec))]
319
320        if not ddep:
321            # no .dirdeps, so remember that we've seen the raw input
322            self.seenit(input)
323            self.seenit(dir)
324            if self.machine == 'none':
325                if dir.startswith(objroot):
326                    return dir.replace(objroot,'')
327                return None
328            m = self.dirdep_re.match(dir.replace(objroot,''))
329            if m:
330                ddep = m.group(2)
331                dmachine = m.group(1)
332                if dmachine != self.machine:
333                    if not (self.machine == 'host' and
334                            dmachine == self.host_target):
335                        if self.debug > 2:
336                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
337                        ddep += '.' + dmachine
338
339        return ddep
340
341    def try_parse(self, name=None, file=None):
342        """give file and line number causing exception"""
343        try:
344            self.parse(name, file)
345        except:
346            # give a useful clue
347            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
348            raise
349
350    def parse(self, name=None, file=None):
351        """A meta file looks like:
352
353	# Meta data file "path"
354	CMD "command-line"
355	CWD "cwd"
356	TARGET "target"
357	-- command output --
358	-- filemon acquired metadata --
359	# buildmon version 3
360	V 3
361	C "pid" "cwd"
362	E "pid" "path"
363        F "pid" "child"
364	R "pid" "path"
365	W "pid" "path"
366	X "pid" "status"
367        D "pid" "path"
368        L "pid" "src" "target"
369        M "pid" "old" "new"
370        S "pid" "path"
371        # Bye bye
372
373        We go to some effort to avoid processing a dependency more than once.
374        Of the above record types only C,E,F,L,R,V and W are of interest.
375        """
376
377        version = 0                     # unknown
378        if name:
379            self.name = name;
380        if file:
381            f = file
382            cwd = last_dir = self.cwd
383        else:
384            f = open(self.name, 'r')
385        skip = True
386        pid_cwd = {}
387        pid_last_dir = {}
388        last_pid = 0
389
390        self.line = 0
391        if self.curdir:
392            self.seenit(self.curdir)    # we ignore this
393
394        interesting = 'CEFLRV'
395        for line in f:
396            self.line += 1
397            # ignore anything we don't care about
398            if not line[0] in interesting:
399                continue
400            if self.debug > 2:
401                print("input:", line, end=' ', file=self.debug_out)
402            w = line.split()
403
404            if skip:
405                if w[0] == 'V':
406                    skip = False
407                    version = int(w[1])
408                    """
409                    if version < 4:
410                        # we cannot ignore 'W' records
411                        # as they may be 'rw'
412                        interesting += 'W'
413                    """
414                elif w[0] == 'CWD':
415                    self.cwd = cwd = last_dir = w[1]
416                    self.seenit(cwd)    # ignore this
417                    if self.debug:
418                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
419                continue
420
421            pid = int(w[1])
422            if pid != last_pid:
423                if last_pid:
424                    pid_cwd[last_pid] = cwd
425                    pid_last_dir[last_pid] = last_dir
426                cwd = getv(pid_cwd, pid, self.cwd)
427                last_dir = getv(pid_last_dir, pid, self.cwd)
428                last_pid = pid
429
430            # process operations
431            if w[0] == 'F':
432                npid = int(w[2])
433                pid_cwd[npid] = cwd
434                pid_last_dir[npid] = cwd
435                last_pid = npid
436                continue
437            elif w[0] == 'C':
438                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
439                if cwd.endswith('/.'):
440                    cwd = cwd[0:-2]
441                last_dir = cwd
442                if self.debug > 1:
443                    print("cwd=", cwd, file=self.debug_out)
444                continue
445
446            if w[2] in self.seen:
447                if self.debug > 2:
448                    print("seen:", w[2], file=self.debug_out)
449                continue
450            # file operations
451            if w[0] in 'ML':
452                path = w[2].strip("'")
453            else:
454                path = w[2]
455            # we are never interested in .dirdep files as dependencies
456            if path.endswith('.dirdep'):
457                continue
458            # we don't want to resolve the last component if it is
459            # a symlink
460            path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
461            if not path:
462                continue
463            dir,base = os.path.split(path)
464            if dir in self.seen:
465                if self.debug > 2:
466                    print("seen:", dir, file=self.debug_out)
467                continue
468            # we can have a path in an objdir which is a link
469            # to the src dir, we may need to add dependencies for each
470            rdir = dir
471            dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
472            if rdir == dir or rdir.find('./') > 0:
473                rdir = None
474            # now put path back together
475            path = '/'.join([dir,base])
476            if self.debug > 1:
477                print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
478            if w[0] in 'SRWL':
479                if w[0] == 'W' and path.endswith('.dirdep'):
480                    continue
481                if path in [last_dir, cwd, self.cwd, self.curdir]:
482                    if self.debug > 1:
483                        print("skipping:", path, file=self.debug_out)
484                    continue
485                if os.path.isdir(path):
486                    if w[0] in 'RW':
487                        last_dir = path;
488                    if self.debug > 1:
489                        print("ldir=", last_dir, file=self.debug_out)
490                    continue
491
492            if w[0] in 'REWML':
493                # finally, we get down to it
494                if dir == self.cwd or dir == self.curdir:
495                    continue
496                srctop = self.find_top(path, self.srctops)
497                if srctop:
498                    if self.dpdeps:
499                        self.add(self.file_deps, path.replace(srctop,''), 'file')
500                    self.add(self.src_deps, dir.replace(srctop,''), 'src')
501                    self.seenit(w[2])
502                    self.seenit(dir)
503                    if rdir and not rdir.startswith(srctop):
504                        dir = rdir      # for below
505                        rdir = None
506                    else:
507                        continue
508
509                objroot = None
510                for dir in [dir,rdir]:
511                    if not dir:
512                        continue
513                    objroot = self.find_top(dir, self.objroots)
514                    if objroot:
515                        break
516                if objroot:
517                    ddep = self.find_obj(objroot, dir, path, w[2])
518                    if ddep:
519                        self.add(self.obj_deps, ddep, 'obj')
520                else:
521                    # don't waste time looking again
522                    self.seenit(w[2])
523                    self.seenit(dir)
524        if not file:
525            f.close()
526
527
528def main(argv, klass=MetaFile, xopts='', xoptf=None):
529    """Simple driver for class MetaFile.
530
531    Usage:
532    	script [options] [key=value ...] "meta" ...
533
534    Options and key=value pairs contribute to the
535    dictionary passed to MetaFile.
536
537    -S "SRCTOP"
538		add "SRCTOP" to the "SRCTOPS" list.
539
540    -C "CURDIR"
541
542    -O "OBJROOT"
543    		add "OBJROOT" to the "OBJROOTS" list.
544
545    -m "MACHINE"
546
547    -a "MACHINE_ARCH"
548
549    -H "HOST_TARGET"
550
551    -D "DPDEPS"
552
553    -d	bumps debug level
554
555    """
556    import getopt
557
558    # import Psyco if we can
559    # it can speed things up quite a bit
560    have_psyco = 0
561    try:
562        import psyco
563        psyco.full()
564        have_psyco = 1
565    except:
566        pass
567
568    conf = {
569        'SRCTOPS': [],
570        'OBJROOTS': [],
571        }
572
573    try:
574        machine = os.environ['MACHINE']
575        if machine:
576            conf['MACHINE'] = machine
577        machine_arch = os.environ['MACHINE_ARCH']
578        if machine_arch:
579            conf['MACHINE_ARCH'] = machine_arch
580        srctop = os.environ['SB_SRC']
581        if srctop:
582            conf['SRCTOPS'].append(srctop)
583        objroot = os.environ['SB_OBJROOT']
584        if objroot:
585            conf['OBJROOTS'].append(objroot)
586    except:
587        pass
588
589    debug = 0
590    output = True
591
592    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts)
593    for o, a in opts:
594        if o == '-a':
595            conf['MACHINE_ARCH'] = a
596        elif o == '-d':
597            debug += 1
598        elif o == '-q':
599            output = False
600        elif o == '-H':
601            conf['HOST_TARGET'] = a
602        elif o == '-S':
603            if a not in conf['SRCTOPS']:
604                conf['SRCTOPS'].append(a)
605        elif o == '-C':
606            conf['CURDIR'] = a
607        elif o == '-O':
608            if a not in conf['OBJROOTS']:
609                conf['OBJROOTS'].append(a)
610        elif o == '-R':
611            conf['RELDIR'] = a
612        elif o == '-D':
613            conf['DPDEPS'] = a
614        elif o == '-m':
615            conf['MACHINE'] = a
616        elif o == '-T':
617            conf['TARGET_SPEC'] = a
618        elif xoptf:
619            xoptf(o, a, conf)
620
621    conf['debug'] = debug
622
623    # get any var=val assignments
624    eaten = []
625    for a in args:
626        if a.find('=') > 0:
627            k,v = a.split('=')
628            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
629                if k == 'SRCTOP':
630                    k = 'SRCTOPS'
631                elif k == 'OBJROOT':
632                    k = 'OBJROOTS'
633                if v not in conf[k]:
634                    conf[k].append(v)
635            else:
636                conf[k] = v
637            eaten.append(a)
638            continue
639        break
640
641    for a in eaten:
642        args.remove(a)
643
644    debug_out = getv(conf, 'debug_out', sys.stderr)
645
646    if debug:
647        print("config:", file=debug_out)
648        print("psyco=", have_psyco, file=debug_out)
649        for k,v in list(conf.items()):
650            print("%s=%s" % (k,v), file=debug_out)
651
652    for a in args:
653        if a.endswith('.meta'):
654            m = klass(a, conf)
655        elif a.startswith('@'):
656            # there can actually multiple files per line
657            for line in open(a[1:]):
658                for f in line.strip().split():
659                    m = klass(f, conf)
660
661    if output:
662        print(m.dirdeps())
663
664        print(m.src_dirdeps('\nsrc:'))
665
666        dpdeps = getv(conf, 'DPDEPS')
667        if dpdeps:
668            m.file_depends(open(dpdeps, 'wb'))
669
670    return m
671
672if __name__ == '__main__':
673    try:
674        main(sys.argv)
675    except:
676        # yes, this goes to stdout
677        print("ERROR: ", sys.exc_info()[1])
678        raise
679
680