xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision 23f6875a43f7ce365f2d52cf857da010c47fb03b)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.22 2016/12/12 19:07:42 sjg Exp $
41
42	Copyright (c) 2011-2013, Juniper Networks, Inc.
43	All rights reserved.
44
45	Redistribution and use in source and binary forms, with or without
46	modification, are permitted provided that the following conditions
47	are met:
48	1. Redistributions of source code must retain the above copyright
49	   notice, this list of conditions and the following disclaimer.
50	2. Redistributions in binary form must reproduce the above copyright
51	   notice, this list of conditions and the following disclaimer in the
52	   documentation and/or other materials provided with the distribution.
53
54	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71    """Lookup key in dict and return value or the supplied default."""
72    if key in dict:
73        return dict[key]
74    return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77    """
78    Return an absolute path, resolving via cwd or last_dir if needed.
79    """
80    if path.endswith('/.'):
81        path = path[0:-2]
82    if len(path) > 0 and path[0] == '/':
83        return path
84    if path == '.':
85        return cwd
86    if path.startswith('./'):
87        return cwd + path[1:]
88    if last_dir == cwd:
89        last_dir = None
90    for d in [last_dir, cwd]:
91        if not d:
92            continue
93        p = '/'.join([d,path])
94        if debug > 2:
95            print("looking for:", p, end=' ', file=debug_out)
96        if not os.path.exists(p):
97            if debug > 2:
98                print("nope", file=debug_out)
99            p = None
100            continue
101        if debug > 2:
102            print("found:", p, file=debug_out)
103        return p
104    return None
105
106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
107    """
108    Return an absolute path, resolving via cwd or last_dir if needed.
109    this gets called a lot, so we try to avoid calling realpath
110    until we know we have something.
111    """
112    rpath = resolve(path, cwd, last_dir, debug, debug_out)
113    if rpath:
114        path = rpath
115    if (path.find('/') < 0 or
116        path.find('./') > 0 or
117        path.endswith('/..') or
118        os.path.islink(path)):
119        return os.path.realpath(path)
120    return path
121
122def sort_unique(list, cmp=None, key=None, reverse=False):
123    list.sort(cmp, key, reverse)
124    nl = []
125    le = None
126    for e in list:
127        if e == le:
128            continue
129        nl.append(e)
130    return nl
131
132def add_trims(x):
133    return ['/' + x + '/',
134            '/' + x,
135            x + '/',
136            x]
137
138class MetaFile:
139    """class to parse meta files generated by bmake."""
140
141    conf = None
142    dirdep_re = None
143    host_target = None
144    srctops = []
145    objroots = []
146    excludes = []
147    seen = {}
148    obj_deps = []
149    src_deps = []
150    file_deps = []
151
152    def __init__(self, name, conf={}):
153        """if name is set we will parse it now.
154        conf can have the follwing keys:
155
156        SRCTOPS list of tops of the src tree(s).
157
158        CURDIR  the src directory 'bmake' was run from.
159
160        RELDIR  the relative path from SRCTOP to CURDIR
161
162        MACHINE the machine we built for.
163                set to 'none' if we are not cross-building.
164                More specifically if machine cannot be deduced from objdirs.
165
166        TARGET_SPEC
167                Sometimes MACHINE isn't enough.
168
169        HOST_TARGET
170                when we build for the pseudo machine 'host'
171                the object tree uses HOST_TARGET rather than MACHINE.
172
173        OBJROOTS a list of the common prefix for all obj dirs it might
174                end in '/' or '-'.
175
176        DPDEPS  names an optional file to which per file dependencies
177                will be appended.
178                For example if 'some/path/foo.h' is read from SRCTOP
179                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
180                This can allow 'bmake' to learn all the dirs within
181                the tree that depend on 'foo.h'
182
183        EXCLUDES
184                A list of paths to ignore.
185                ccache(1) can otherwise be trouble.
186
187        debug   desired debug level
188
189        debug_out open file to send debug output to (sys.stderr)
190
191        """
192
193        self.name = name
194        self.debug = getv(conf, 'debug', 0)
195        self.debug_out = getv(conf, 'debug_out', sys.stderr)
196
197        self.machine = getv(conf, 'MACHINE', '')
198        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
199        self.target_spec = getv(conf, 'TARGET_SPEC', '')
200        self.curdir = getv(conf, 'CURDIR')
201        self.reldir = getv(conf, 'RELDIR')
202        self.dpdeps = getv(conf, 'DPDEPS')
203        self.line = 0
204
205        if not self.conf:
206            # some of the steps below we want to do only once
207            self.conf = conf
208            self.host_target = getv(conf, 'HOST_TARGET')
209            for srctop in getv(conf, 'SRCTOPS', []):
210                if srctop[-1] != '/':
211                    srctop += '/'
212                if not srctop in self.srctops:
213                    self.srctops.append(srctop)
214                _srctop = os.path.realpath(srctop)
215                if _srctop[-1] != '/':
216                    _srctop += '/'
217                if not _srctop in self.srctops:
218                    self.srctops.append(_srctop)
219
220            trim_list = add_trims(self.machine)
221            if self.machine == 'host':
222                trim_list += add_trims(self.host_target)
223            if self.target_spec:
224                trim_list += add_trims(self.target_spec)
225
226            for objroot in getv(conf, 'OBJROOTS', []):
227                for e in trim_list:
228                    if objroot.endswith(e):
229                        # this is not what we want - fix it
230                        objroot = objroot[0:-len(e)]
231
232                if objroot[-1] != '/':
233                    objroot += '/'
234                if not objroot in self.objroots:
235                    self.objroots.append(objroot)
236                    _objroot = os.path.realpath(objroot)
237                    if objroot[-1] == '/':
238                        _objroot += '/'
239                    if not _objroot in self.objroots:
240                        self.objroots.append(_objroot)
241
242            # we want the longest match
243            self.srctops.sort(reverse=True)
244            self.objroots.sort(reverse=True)
245
246            self.excludes = getv(conf, 'EXCLUDES', [])
247
248            if self.debug:
249                print("host_target=", self.host_target, file=self.debug_out)
250                print("srctops=", self.srctops, file=self.debug_out)
251                print("objroots=", self.objroots, file=self.debug_out)
252                print("excludes=", self.excludes, file=self.debug_out)
253
254            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
255
256        if self.dpdeps and not self.reldir:
257            if self.debug:
258                print("need reldir:", end=' ', file=self.debug_out)
259            if self.curdir:
260                srctop = self.find_top(self.curdir, self.srctops)
261                if srctop:
262                    self.reldir = self.curdir.replace(srctop,'')
263                    if self.debug:
264                        print(self.reldir, file=self.debug_out)
265            if not self.reldir:
266                self.dpdeps = None      # we cannot do it?
267
268        self.cwd = os.getcwd()          # make sure this is initialized
269        self.last_dir = self.cwd
270
271        if name:
272            self.try_parse()
273
274    def reset(self):
275        """reset state if we are being passed meta files from multiple directories."""
276        self.seen = {}
277        self.obj_deps = []
278        self.src_deps = []
279        self.file_deps = []
280
281    def dirdeps(self, sep='\n'):
282        """return DIRDEPS"""
283        return sep.strip() + sep.join(self.obj_deps)
284
285    def src_dirdeps(self, sep='\n'):
286        """return SRC_DIRDEPS"""
287        return sep.strip() + sep.join(self.src_deps)
288
289    def file_depends(self, out=None):
290        """Append DPDEPS_${file} += ${RELDIR}
291        for each file we saw, to the output file."""
292        if not self.reldir:
293            return None
294        for f in sort_unique(self.file_deps):
295            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
296        # these entries provide for reverse DIRDEPS lookup
297        for f in self.obj_deps:
298            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
299
300    def seenit(self, dir):
301        """rememer that we have seen dir."""
302        self.seen[dir] = 1
303
304    def add(self, list, data, clue=''):
305        """add data to list if it isn't already there."""
306        if data not in list:
307            list.append(data)
308            if self.debug:
309                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
310
311    def find_top(self, path, list):
312        """the logical tree may be split across multiple trees"""
313        for top in list:
314            if path.startswith(top):
315                if self.debug > 2:
316                    print("found in", top, file=self.debug_out)
317                return top
318        return None
319
320    def find_obj(self, objroot, dir, path, input):
321        """return path within objroot, taking care of .dirdep files"""
322        ddep = None
323        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
324            if not ddep and os.path.exists(ddepf):
325                ddep = open(ddepf, 'r').readline().strip('# \n')
326                if self.debug > 1:
327                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
328                if ddep.endswith(self.machine):
329                    ddep = ddep[0:-(1+len(self.machine))]
330                elif self.target_spec and ddep.endswith(self.target_spec):
331                    ddep = ddep[0:-(1+len(self.target_spec))]
332
333        if not ddep:
334            # no .dirdeps, so remember that we've seen the raw input
335            self.seenit(input)
336            self.seenit(dir)
337            if self.machine == 'none':
338                if dir.startswith(objroot):
339                    return dir.replace(objroot,'')
340                return None
341            m = self.dirdep_re.match(dir.replace(objroot,''))
342            if m:
343                ddep = m.group(2)
344                dmachine = m.group(1)
345                if dmachine != self.machine:
346                    if not (self.machine == 'host' and
347                            dmachine == self.host_target):
348                        if self.debug > 2:
349                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
350                        ddep += '.' + dmachine
351
352        return ddep
353
354    def try_parse(self, name=None, file=None):
355        """give file and line number causing exception"""
356        try:
357            self.parse(name, file)
358        except:
359            # give a useful clue
360            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
361            raise
362
363    def parse(self, name=None, file=None):
364        """A meta file looks like:
365
366        # Meta data file "path"
367        CMD "command-line"
368        CWD "cwd"
369        TARGET "target"
370        -- command output --
371        -- filemon acquired metadata --
372        # buildmon version 3
373        V 3
374        C "pid" "cwd"
375        E "pid" "path"
376        F "pid" "child"
377        R "pid" "path"
378        W "pid" "path"
379        X "pid" "status"
380        D "pid" "path"
381        L "pid" "src" "target"
382        M "pid" "old" "new"
383        S "pid" "path"
384        # Bye bye
385
386        We go to some effort to avoid processing a dependency more than once.
387        Of the above record types only C,E,F,L,R,V and W are of interest.
388        """
389
390        version = 0                     # unknown
391        if name:
392            self.name = name;
393        if file:
394            f = file
395            cwd = self.last_dir = self.cwd
396        else:
397            f = open(self.name, 'r')
398        skip = True
399        pid_cwd = {}
400        pid_last_dir = {}
401        last_pid = 0
402
403        self.line = 0
404        if self.curdir:
405            self.seenit(self.curdir)    # we ignore this
406
407        interesting = 'CEFLRV'
408        for line in f:
409            self.line += 1
410            # ignore anything we don't care about
411            if not line[0] in interesting:
412                continue
413            if self.debug > 2:
414                print("input:", line, end=' ', file=self.debug_out)
415            w = line.split()
416
417            if skip:
418                if w[0] == 'V':
419                    skip = False
420                    version = int(w[1])
421                    """
422                    if version < 4:
423                        # we cannot ignore 'W' records
424                        # as they may be 'rw'
425                        interesting += 'W'
426                    """
427                elif w[0] == 'CWD':
428                    self.cwd = cwd = self.last_dir = w[1]
429                    self.seenit(cwd)    # ignore this
430                    if self.debug:
431                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
432                continue
433
434            pid = int(w[1])
435            if pid != last_pid:
436                if last_pid:
437                    pid_last_dir[last_pid] = self.last_dir
438                cwd = getv(pid_cwd, pid, self.cwd)
439                self.last_dir = getv(pid_last_dir, pid, self.cwd)
440                last_pid = pid
441
442            # process operations
443            if w[0] == 'F':
444                npid = int(w[2])
445                pid_cwd[npid] = cwd
446                pid_last_dir[npid] = cwd
447                last_pid = npid
448                continue
449            elif w[0] == 'C':
450                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
451                if cwd.endswith('/.'):
452                    cwd = cwd[0:-2]
453                self.last_dir = pid_last_dir[pid] = cwd
454                pid_cwd[pid] = cwd
455                if self.debug > 1:
456                    print("cwd=", cwd, file=self.debug_out)
457                continue
458
459            if w[2] in self.seen:
460                if self.debug > 2:
461                    print("seen:", w[2], file=self.debug_out)
462                continue
463            # file operations
464            if w[0] in 'ML':
465                # these are special, tread src as read and
466                # target as write
467                self.parse_path(w[1].strip("'"), cwd, 'R', w)
468                self.parse_path(w[2].strip("'"), cwd, 'W', w)
469                continue
470            elif w[0] in 'ERWS':
471                path = w[2]
472                self.parse_path(path, cwd, w[0], w)
473
474        if not file:
475            f.close()
476
477    def parse_path(self, path, cwd, op=None, w=[]):
478        """look at a path for the op specified"""
479
480        if not op:
481            op = w[0]
482
483        # we are never interested in .dirdep files as dependencies
484        if path.endswith('.dirdep'):
485            return
486        for p in self.excludes:
487            if p and path.startswith(p):
488                if self.debug > 2:
489                    print("exclude:", p, path, file=self.debug_out)
490                return
491        # we don't want to resolve the last component if it is
492        # a symlink
493        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
494        if not path:
495            return
496        dir,base = os.path.split(path)
497        if dir in self.seen:
498            if self.debug > 2:
499                print("seen:", dir, file=self.debug_out)
500            return
501        # we can have a path in an objdir which is a link
502        # to the src dir, we may need to add dependencies for each
503        rdir = dir
504        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
505        if rdir == dir or rdir.find('./') > 0:
506            rdir = None
507        # now put path back together
508        path = '/'.join([dir,base])
509        if self.debug > 1:
510            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
511        if op in 'RWS':
512            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
513                if self.debug > 1:
514                    print("skipping:", path, file=self.debug_out)
515                return
516            if os.path.isdir(path):
517                if op in 'RW':
518                    self.last_dir = path;
519                if self.debug > 1:
520                    print("ldir=", self.last_dir, file=self.debug_out)
521                return
522
523        if op in 'ERW':
524            # finally, we get down to it
525            if dir == self.cwd or dir == self.curdir:
526                return
527            srctop = self.find_top(path, self.srctops)
528            if srctop:
529                if self.dpdeps:
530                    self.add(self.file_deps, path.replace(srctop,''), 'file')
531                self.add(self.src_deps, dir.replace(srctop,''), 'src')
532                self.seenit(w[2])
533                self.seenit(dir)
534                if rdir and not rdir.startswith(srctop):
535                    dir = rdir      # for below
536                    rdir = None
537                else:
538                    return
539
540            objroot = None
541            for dir in [dir,rdir]:
542                if not dir:
543                    continue
544                objroot = self.find_top(dir, self.objroots)
545                if objroot:
546                    break
547            if objroot:
548                ddep = self.find_obj(objroot, dir, path, w[2])
549                if ddep:
550                    self.add(self.obj_deps, ddep, 'obj')
551                    if self.dpdeps and objroot.endswith('/stage/'):
552                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
553                        self.add(self.file_deps, sp, 'file')
554            else:
555                # don't waste time looking again
556                self.seenit(w[2])
557                self.seenit(dir)
558
559
560def main(argv, klass=MetaFile, xopts='', xoptf=None):
561    """Simple driver for class MetaFile.
562
563    Usage:
564        script [options] [key=value ...] "meta" ...
565
566    Options and key=value pairs contribute to the
567    dictionary passed to MetaFile.
568
569    -S "SRCTOP"
570                add "SRCTOP" to the "SRCTOPS" list.
571
572    -C "CURDIR"
573
574    -O "OBJROOT"
575                add "OBJROOT" to the "OBJROOTS" list.
576
577    -m "MACHINE"
578
579    -a "MACHINE_ARCH"
580
581    -H "HOST_TARGET"
582
583    -D "DPDEPS"
584
585    -d  bumps debug level
586
587    """
588    import getopt
589
590    # import Psyco if we can
591    # it can speed things up quite a bit
592    have_psyco = 0
593    try:
594        import psyco
595        psyco.full()
596        have_psyco = 1
597    except:
598        pass
599
600    conf = {
601        'SRCTOPS': [],
602        'OBJROOTS': [],
603        'EXCLUDES': [],
604        }
605
606    try:
607        machine = os.environ['MACHINE']
608        if machine:
609            conf['MACHINE'] = machine
610        machine_arch = os.environ['MACHINE_ARCH']
611        if machine_arch:
612            conf['MACHINE_ARCH'] = machine_arch
613        srctop = os.environ['SB_SRC']
614        if srctop:
615            conf['SRCTOPS'].append(srctop)
616        objroot = os.environ['SB_OBJROOT']
617        if objroot:
618            conf['OBJROOTS'].append(objroot)
619    except:
620        pass
621
622    debug = 0
623    output = True
624
625    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
626    for o, a in opts:
627        if o == '-a':
628            conf['MACHINE_ARCH'] = a
629        elif o == '-d':
630            debug += 1
631        elif o == '-q':
632            output = False
633        elif o == '-H':
634            conf['HOST_TARGET'] = a
635        elif o == '-S':
636            if a not in conf['SRCTOPS']:
637                conf['SRCTOPS'].append(a)
638        elif o == '-C':
639            conf['CURDIR'] = a
640        elif o == '-O':
641            if a not in conf['OBJROOTS']:
642                conf['OBJROOTS'].append(a)
643        elif o == '-R':
644            conf['RELDIR'] = a
645        elif o == '-D':
646            conf['DPDEPS'] = a
647        elif o == '-m':
648            conf['MACHINE'] = a
649        elif o == '-T':
650            conf['TARGET_SPEC'] = a
651        elif o == '-X':
652            if a not in conf['EXCLUDES']:
653                conf['EXCLUDES'].append(a)
654        elif xoptf:
655            xoptf(o, a, conf)
656
657    conf['debug'] = debug
658
659    # get any var=val assignments
660    eaten = []
661    for a in args:
662        if a.find('=') > 0:
663            k,v = a.split('=')
664            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
665                if k == 'SRCTOP':
666                    k = 'SRCTOPS'
667                elif k == 'OBJROOT':
668                    k = 'OBJROOTS'
669                if v not in conf[k]:
670                    conf[k].append(v)
671            else:
672                conf[k] = v
673            eaten.append(a)
674            continue
675        break
676
677    for a in eaten:
678        args.remove(a)
679
680    debug_out = getv(conf, 'debug_out', sys.stderr)
681
682    if debug:
683        print("config:", file=debug_out)
684        print("psyco=", have_psyco, file=debug_out)
685        for k,v in list(conf.items()):
686            print("%s=%s" % (k,v), file=debug_out)
687
688    m = None
689    for a in args:
690        if a.endswith('.meta'):
691            if not os.path.exists(a):
692                continue
693            m = klass(a, conf)
694        elif a.startswith('@'):
695            # there can actually multiple files per line
696            for line in open(a[1:]):
697                for f in line.strip().split():
698                    if not os.path.exists(f):
699                        continue
700                    m = klass(f, conf)
701
702    if output and m:
703        print(m.dirdeps())
704
705        print(m.src_dirdeps('\nsrc:'))
706
707        dpdeps = getv(conf, 'DPDEPS')
708        if dpdeps:
709            m.file_depends(open(dpdeps, 'wb'))
710
711    return m
712
713if __name__ == '__main__':
714    try:
715        main(sys.argv)
716    except:
717        # yes, this goes to stdout
718        print("ERROR: ", sys.exc_info()[1])
719        raise
720
721