xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision 5944f899a2519c6321bac3c17cc076418643a088)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $
41
42	Copyright (c) 2011-2013, Juniper Networks, Inc.
43	All rights reserved.
44
45	Redistribution and use in source and binary forms, with or without
46	modification, are permitted provided that the following conditions
47	are met:
48	1. Redistributions of source code must retain the above copyright
49	   notice, this list of conditions and the following disclaimer.
50	2. Redistributions in binary form must reproduce the above copyright
51	   notice, this list of conditions and the following disclaimer in the
52	   documentation and/or other materials provided with the distribution.
53
54	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71    """Lookup key in dict and return value or the supplied default."""
72    if key in dict:
73        return dict[key]
74    return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77    """
78    Return an absolute path, resolving via cwd or last_dir if needed.
79    """
80    if path.endswith('/.'):
81        path = path[0:-2]
82    if len(path) > 0 and path[0] == '/':
83        return path
84    if path == '.':
85        return cwd
86    if path.startswith('./'):
87        return cwd + path[1:]
88    if last_dir == cwd:
89        last_dir = None
90    for d in [last_dir, cwd]:
91        if not d:
92            continue
93        p = '/'.join([d,path])
94        if debug > 2:
95            print("looking for:", p, end=' ', file=debug_out)
96        if not os.path.exists(p):
97            if debug > 2:
98                print("nope", file=debug_out)
99            p = None
100            continue
101        if debug > 2:
102            print("found:", p, file=debug_out)
103        return p
104    return None
105
106def cleanpath(path):
107    """cleanup path without using realpath(3)"""
108    if path.startswith('/'):
109        r = '/'
110    else:
111        r = ''
112    p = []
113    w = path.split('/')
114    for d in w:
115        if not d or d == '.':
116            continue
117        if d == '..':
118            p.pop()
119            continue
120        p.append(d)
121
122    return r + '/'.join(p)
123
124def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
125    """
126    Return an absolute path, resolving via cwd or last_dir if needed.
127    this gets called a lot, so we try to avoid calling realpath.
128    """
129    rpath = resolve(path, cwd, last_dir, debug, debug_out)
130    if rpath:
131        path = rpath
132    if (path.find('/') < 0 or
133        path.find('./') > 0 or
134        path.endswith('/..')):
135        path = cleanpath(path)
136    return path
137
138def sort_unique(list, cmp=None, key=None, reverse=False):
139    list.sort(cmp, key, reverse)
140    nl = []
141    le = None
142    for e in list:
143        if e == le:
144            continue
145	le = e
146        nl.append(e)
147    return nl
148
149def add_trims(x):
150    return ['/' + x + '/',
151            '/' + x,
152            x + '/',
153            x]
154
155class MetaFile:
156    """class to parse meta files generated by bmake."""
157
158    conf = None
159    dirdep_re = None
160    host_target = None
161    srctops = []
162    objroots = []
163    excludes = []
164    seen = {}
165    obj_deps = []
166    src_deps = []
167    file_deps = []
168
169    def __init__(self, name, conf={}):
170        """if name is set we will parse it now.
171        conf can have the follwing keys:
172
173        SRCTOPS list of tops of the src tree(s).
174
175        CURDIR  the src directory 'bmake' was run from.
176
177        RELDIR  the relative path from SRCTOP to CURDIR
178
179        MACHINE the machine we built for.
180                set to 'none' if we are not cross-building.
181                More specifically if machine cannot be deduced from objdirs.
182
183        TARGET_SPEC
184                Sometimes MACHINE isn't enough.
185
186        HOST_TARGET
187                when we build for the pseudo machine 'host'
188                the object tree uses HOST_TARGET rather than MACHINE.
189
190        OBJROOTS a list of the common prefix for all obj dirs it might
191                end in '/' or '-'.
192
193        DPDEPS  names an optional file to which per file dependencies
194                will be appended.
195                For example if 'some/path/foo.h' is read from SRCTOP
196                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
197                This can allow 'bmake' to learn all the dirs within
198                the tree that depend on 'foo.h'
199
200        EXCLUDES
201                A list of paths to ignore.
202                ccache(1) can otherwise be trouble.
203
204        debug   desired debug level
205
206        debug_out open file to send debug output to (sys.stderr)
207
208        """
209
210        self.name = name
211        self.debug = getv(conf, 'debug', 0)
212        self.debug_out = getv(conf, 'debug_out', sys.stderr)
213
214        self.machine = getv(conf, 'MACHINE', '')
215        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
216        self.target_spec = getv(conf, 'TARGET_SPEC', '')
217        self.curdir = getv(conf, 'CURDIR')
218        self.reldir = getv(conf, 'RELDIR')
219        self.dpdeps = getv(conf, 'DPDEPS')
220        self.line = 0
221
222        if not self.conf:
223            # some of the steps below we want to do only once
224            self.conf = conf
225            self.host_target = getv(conf, 'HOST_TARGET')
226            for srctop in getv(conf, 'SRCTOPS', []):
227                if srctop[-1] != '/':
228                    srctop += '/'
229                if not srctop in self.srctops:
230                    self.srctops.append(srctop)
231                _srctop = os.path.realpath(srctop)
232                if _srctop[-1] != '/':
233                    _srctop += '/'
234                if not _srctop in self.srctops:
235                    self.srctops.append(_srctop)
236
237            trim_list = add_trims(self.machine)
238            if self.machine == 'host':
239                trim_list += add_trims(self.host_target)
240            if self.target_spec:
241                trim_list += add_trims(self.target_spec)
242
243            for objroot in getv(conf, 'OBJROOTS', []):
244                for e in trim_list:
245                    if objroot.endswith(e):
246                        # this is not what we want - fix it
247                        objroot = objroot[0:-len(e)]
248
249                if objroot[-1] != '/':
250                    objroot += '/'
251                if not objroot in self.objroots:
252                    self.objroots.append(objroot)
253                    _objroot = os.path.realpath(objroot)
254                    if objroot[-1] == '/':
255                        _objroot += '/'
256                    if not _objroot in self.objroots:
257                        self.objroots.append(_objroot)
258
259            # we want the longest match
260            self.srctops.sort(reverse=True)
261            self.objroots.sort(reverse=True)
262
263            self.excludes = getv(conf, 'EXCLUDES', [])
264
265            if self.debug:
266                print("host_target=", self.host_target, file=self.debug_out)
267                print("srctops=", self.srctops, file=self.debug_out)
268                print("objroots=", self.objroots, file=self.debug_out)
269                print("excludes=", self.excludes, file=self.debug_out)
270
271            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
272
273        if self.dpdeps and not self.reldir:
274            if self.debug:
275                print("need reldir:", end=' ', file=self.debug_out)
276            if self.curdir:
277                srctop = self.find_top(self.curdir, self.srctops)
278                if srctop:
279                    self.reldir = self.curdir.replace(srctop,'')
280                    if self.debug:
281                        print(self.reldir, file=self.debug_out)
282            if not self.reldir:
283                self.dpdeps = None      # we cannot do it?
284
285        self.cwd = os.getcwd()          # make sure this is initialized
286        self.last_dir = self.cwd
287
288        if name:
289            self.try_parse()
290
291    def reset(self):
292        """reset state if we are being passed meta files from multiple directories."""
293        self.seen = {}
294        self.obj_deps = []
295        self.src_deps = []
296        self.file_deps = []
297
298    def dirdeps(self, sep='\n'):
299        """return DIRDEPS"""
300        return sep.strip() + sep.join(self.obj_deps)
301
302    def src_dirdeps(self, sep='\n'):
303        """return SRC_DIRDEPS"""
304        return sep.strip() + sep.join(self.src_deps)
305
306    def file_depends(self, out=None):
307        """Append DPDEPS_${file} += ${RELDIR}
308        for each file we saw, to the output file."""
309        if not self.reldir:
310            return None
311        for f in sort_unique(self.file_deps):
312            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
313        # these entries provide for reverse DIRDEPS lookup
314        for f in self.obj_deps:
315            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
316
317    def seenit(self, dir):
318        """rememer that we have seen dir."""
319        self.seen[dir] = 1
320
321    def add(self, list, data, clue=''):
322        """add data to list if it isn't already there."""
323        if data not in list:
324            list.append(data)
325            if self.debug:
326                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
327
328    def find_top(self, path, list):
329        """the logical tree may be split across multiple trees"""
330        for top in list:
331            if path.startswith(top):
332                if self.debug > 2:
333                    print("found in", top, file=self.debug_out)
334                return top
335        return None
336
337    def find_obj(self, objroot, dir, path, input):
338        """return path within objroot, taking care of .dirdep files"""
339        ddep = None
340        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
341            if not ddep and os.path.exists(ddepf):
342                ddep = open(ddepf, 'r').readline().strip('# \n')
343                if self.debug > 1:
344                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
345                if ddep.endswith(self.machine):
346                    ddep = ddep[0:-(1+len(self.machine))]
347                elif self.target_spec and ddep.endswith(self.target_spec):
348                    ddep = ddep[0:-(1+len(self.target_spec))]
349
350        if not ddep:
351            # no .dirdeps, so remember that we've seen the raw input
352            self.seenit(input)
353            self.seenit(dir)
354            if self.machine == 'none':
355                if dir.startswith(objroot):
356                    return dir.replace(objroot,'')
357                return None
358            m = self.dirdep_re.match(dir.replace(objroot,''))
359            if m:
360                ddep = m.group(2)
361                dmachine = m.group(1)
362                if dmachine != self.machine:
363                    if not (self.machine == 'host' and
364                            dmachine == self.host_target):
365                        if self.debug > 2:
366                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
367                        ddep += '.' + dmachine
368
369        return ddep
370
371    def try_parse(self, name=None, file=None):
372        """give file and line number causing exception"""
373        try:
374            self.parse(name, file)
375        except:
376            # give a useful clue
377            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
378            raise
379
380    def parse(self, name=None, file=None):
381        """A meta file looks like:
382
383        # Meta data file "path"
384        CMD "command-line"
385        CWD "cwd"
386        TARGET "target"
387        -- command output --
388        -- filemon acquired metadata --
389        # buildmon version 3
390        V 3
391        C "pid" "cwd"
392        E "pid" "path"
393        F "pid" "child"
394        R "pid" "path"
395        W "pid" "path"
396        X "pid" "status"
397        D "pid" "path"
398        L "pid" "src" "target"
399        M "pid" "old" "new"
400        S "pid" "path"
401        # Bye bye
402
403        We go to some effort to avoid processing a dependency more than once.
404        Of the above record types only C,E,F,L,R,V and W are of interest.
405        """
406
407        version = 0                     # unknown
408        if name:
409            self.name = name;
410        if file:
411            f = file
412            cwd = self.last_dir = self.cwd
413        else:
414            f = open(self.name, 'r')
415        skip = True
416        pid_cwd = {}
417        pid_last_dir = {}
418        last_pid = 0
419
420        self.line = 0
421        if self.curdir:
422            self.seenit(self.curdir)    # we ignore this
423
424        interesting = 'CEFLRV'
425        for line in f:
426            self.line += 1
427            # ignore anything we don't care about
428            if not line[0] in interesting:
429                continue
430            if self.debug > 2:
431                print("input:", line, end=' ', file=self.debug_out)
432            w = line.split()
433
434            if skip:
435                if w[0] == 'V':
436                    skip = False
437                    version = int(w[1])
438                    """
439                    if version < 4:
440                        # we cannot ignore 'W' records
441                        # as they may be 'rw'
442                        interesting += 'W'
443                    """
444                elif w[0] == 'CWD':
445                    self.cwd = cwd = self.last_dir = w[1]
446                    self.seenit(cwd)    # ignore this
447                    if self.debug:
448                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
449                continue
450
451            pid = int(w[1])
452            if pid != last_pid:
453                if last_pid:
454                    pid_last_dir[last_pid] = self.last_dir
455                cwd = getv(pid_cwd, pid, self.cwd)
456                self.last_dir = getv(pid_last_dir, pid, self.cwd)
457                last_pid = pid
458
459            # process operations
460            if w[0] == 'F':
461                npid = int(w[2])
462                pid_cwd[npid] = cwd
463                pid_last_dir[npid] = cwd
464                last_pid = npid
465                continue
466            elif w[0] == 'C':
467                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
468                if cwd.endswith('/.'):
469                    cwd = cwd[0:-2]
470                self.last_dir = pid_last_dir[pid] = cwd
471                pid_cwd[pid] = cwd
472                if self.debug > 1:
473                    print("cwd=", cwd, file=self.debug_out)
474                continue
475
476            if w[2] in self.seen:
477                if self.debug > 2:
478                    print("seen:", w[2], file=self.debug_out)
479                continue
480            # file operations
481            if w[0] in 'ML':
482                # these are special, tread src as read and
483                # target as write
484                self.parse_path(w[1].strip("'"), cwd, 'R', w)
485                self.parse_path(w[2].strip("'"), cwd, 'W', w)
486                continue
487            elif w[0] in 'ERWS':
488                path = w[2]
489                self.parse_path(path, cwd, w[0], w)
490
491        if not file:
492            f.close()
493
494    def parse_path(self, path, cwd, op=None, w=[]):
495        """look at a path for the op specified"""
496
497        if not op:
498            op = w[0]
499
500        # we are never interested in .dirdep files as dependencies
501        if path.endswith('.dirdep'):
502            return
503        for p in self.excludes:
504            if p and path.startswith(p):
505                if self.debug > 2:
506                    print("exclude:", p, path, file=self.debug_out)
507                return
508        # we don't want to resolve the last component if it is
509        # a symlink
510        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
511        if not path:
512            return
513        dir,base = os.path.split(path)
514        if dir in self.seen:
515            if self.debug > 2:
516                print("seen:", dir, file=self.debug_out)
517            return
518        # we can have a path in an objdir which is a link
519        # to the src dir, we may need to add dependencies for each
520        rdir = dir
521        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
522        if rdir == dir or rdir.find('./') > 0:
523            rdir = None
524        if os.path.islink(dir):
525            rdir = os.path.realpath(dir)
526        # now put path back together
527        path = '/'.join([dir,base])
528        if self.debug > 1:
529            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
530        if op in 'RWS':
531            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
532                if self.debug > 1:
533                    print("skipping:", path, file=self.debug_out)
534                return
535            if os.path.isdir(path):
536                if op in 'RW':
537                    self.last_dir = path;
538                if self.debug > 1:
539                    print("ldir=", self.last_dir, file=self.debug_out)
540                return
541
542        if op in 'ERW':
543            # finally, we get down to it
544            if dir == self.cwd or dir == self.curdir:
545                return
546            srctop = self.find_top(path, self.srctops)
547            if srctop:
548                if self.dpdeps:
549                    self.add(self.file_deps, path.replace(srctop,''), 'file')
550                self.add(self.src_deps, dir.replace(srctop,''), 'src')
551                self.seenit(w[2])
552                self.seenit(dir)
553                if rdir and not rdir.startswith(srctop):
554                    dir = rdir      # for below
555                    rdir = None
556                else:
557                    return
558
559            objroot = None
560            for dir in [dir,rdir]:
561                if not dir:
562                    continue
563                objroot = self.find_top(dir, self.objroots)
564                if objroot:
565                    break
566            if objroot:
567                ddep = self.find_obj(objroot, dir, path, w[2])
568                if ddep:
569                    self.add(self.obj_deps, ddep, 'obj')
570                    if self.dpdeps and objroot.endswith('/stage/'):
571                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
572                        self.add(self.file_deps, sp, 'file')
573            else:
574                # don't waste time looking again
575                self.seenit(w[2])
576                self.seenit(dir)
577
578
579def main(argv, klass=MetaFile, xopts='', xoptf=None):
580    """Simple driver for class MetaFile.
581
582    Usage:
583        script [options] [key=value ...] "meta" ...
584
585    Options and key=value pairs contribute to the
586    dictionary passed to MetaFile.
587
588    -S "SRCTOP"
589                add "SRCTOP" to the "SRCTOPS" list.
590
591    -C "CURDIR"
592
593    -O "OBJROOT"
594                add "OBJROOT" to the "OBJROOTS" list.
595
596    -m "MACHINE"
597
598    -a "MACHINE_ARCH"
599
600    -H "HOST_TARGET"
601
602    -D "DPDEPS"
603
604    -d  bumps debug level
605
606    """
607    import getopt
608
609    # import Psyco if we can
610    # it can speed things up quite a bit
611    have_psyco = 0
612    try:
613        import psyco
614        psyco.full()
615        have_psyco = 1
616    except:
617        pass
618
619    conf = {
620        'SRCTOPS': [],
621        'OBJROOTS': [],
622        'EXCLUDES': [],
623        }
624
625    try:
626        machine = os.environ['MACHINE']
627        if machine:
628            conf['MACHINE'] = machine
629        machine_arch = os.environ['MACHINE_ARCH']
630        if machine_arch:
631            conf['MACHINE_ARCH'] = machine_arch
632        srctop = os.environ['SB_SRC']
633        if srctop:
634            conf['SRCTOPS'].append(srctop)
635        objroot = os.environ['SB_OBJROOT']
636        if objroot:
637            conf['OBJROOTS'].append(objroot)
638    except:
639        pass
640
641    debug = 0
642    output = True
643
644    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
645    for o, a in opts:
646        if o == '-a':
647            conf['MACHINE_ARCH'] = a
648        elif o == '-d':
649            debug += 1
650        elif o == '-q':
651            output = False
652        elif o == '-H':
653            conf['HOST_TARGET'] = a
654        elif o == '-S':
655            if a not in conf['SRCTOPS']:
656                conf['SRCTOPS'].append(a)
657        elif o == '-C':
658            conf['CURDIR'] = a
659        elif o == '-O':
660            if a not in conf['OBJROOTS']:
661                conf['OBJROOTS'].append(a)
662        elif o == '-R':
663            conf['RELDIR'] = a
664        elif o == '-D':
665            conf['DPDEPS'] = a
666        elif o == '-m':
667            conf['MACHINE'] = a
668        elif o == '-T':
669            conf['TARGET_SPEC'] = a
670        elif o == '-X':
671            if a not in conf['EXCLUDES']:
672                conf['EXCLUDES'].append(a)
673        elif xoptf:
674            xoptf(o, a, conf)
675
676    conf['debug'] = debug
677
678    # get any var=val assignments
679    eaten = []
680    for a in args:
681        if a.find('=') > 0:
682            k,v = a.split('=')
683            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
684                if k == 'SRCTOP':
685                    k = 'SRCTOPS'
686                elif k == 'OBJROOT':
687                    k = 'OBJROOTS'
688                if v not in conf[k]:
689                    conf[k].append(v)
690            else:
691                conf[k] = v
692            eaten.append(a)
693            continue
694        break
695
696    for a in eaten:
697        args.remove(a)
698
699    debug_out = getv(conf, 'debug_out', sys.stderr)
700
701    if debug:
702        print("config:", file=debug_out)
703        print("psyco=", have_psyco, file=debug_out)
704        for k,v in list(conf.items()):
705            print("%s=%s" % (k,v), file=debug_out)
706
707    m = None
708    for a in args:
709        if a.endswith('.meta'):
710            if not os.path.exists(a):
711                continue
712            m = klass(a, conf)
713        elif a.startswith('@'):
714            # there can actually multiple files per line
715            for line in open(a[1:]):
716                for f in line.strip().split():
717                    if not os.path.exists(f):
718                        continue
719                    m = klass(f, conf)
720
721    if output and m:
722        print(m.dirdeps())
723
724        print(m.src_dirdeps('\nsrc:'))
725
726        dpdeps = getv(conf, 'DPDEPS')
727        if dpdeps:
728            m.file_depends(open(dpdeps, 'wb'))
729
730    return m
731
732if __name__ == '__main__':
733    try:
734        main(sys.argv)
735    except:
736        # yes, this goes to stdout
737        print("ERROR: ", sys.exc_info()[1])
738        raise
739
740