xref: /freebsd/share/mk/meta2deps.py (revision ab00ac327a66a53edaac95b536b209db3ae2cd9f)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$FreeBSD$
41	$Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $
42
43	Copyright (c) 2011-2013, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        return path
85    if path == '.':
86        return cwd
87    if path.startswith('./'):
88        return cwd + path[1:]
89    if last_dir == cwd:
90        last_dir = None
91    for d in [last_dir, cwd]:
92        if not d:
93            continue
94        p = '/'.join([d,path])
95        if debug > 2:
96            print("looking for:", p, end=' ', file=debug_out)
97        if not os.path.exists(p):
98            if debug > 2:
99                print("nope", file=debug_out)
100            p = None
101            continue
102        if debug > 2:
103            print("found:", p, file=debug_out)
104        return p
105    return None
106
107def cleanpath(path):
108    """cleanup path without using realpath(3)"""
109    if path.startswith('/'):
110        r = '/'
111    else:
112        r = ''
113    p = []
114    w = path.split('/')
115    for d in w:
116        if not d or d == '.':
117            continue
118        if d == '..':
119            p.pop()
120            continue
121        p.append(d)
122
123    return r + '/'.join(p)
124
125def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
126    """
127    Return an absolute path, resolving via cwd or last_dir if needed.
128    this gets called a lot, so we try to avoid calling realpath.
129    """
130    rpath = resolve(path, cwd, last_dir, debug, debug_out)
131    if rpath:
132        path = rpath
133    if (path.find('/') < 0 or
134        path.find('./') > 0 or
135        path.endswith('/..')):
136        path = cleanpath(path)
137    return path
138
139def sort_unique(list, cmp=None, key=None, reverse=False):
140    list.sort(cmp, key, reverse)
141    nl = []
142    le = None
143    for e in list:
144        if e == le:
145            continue
146        le = e
147        nl.append(e)
148    return nl
149
150def add_trims(x):
151    return ['/' + x + '/',
152            '/' + x,
153            x + '/',
154            x]
155
156class MetaFile:
157    """class to parse meta files generated by bmake."""
158
159    conf = None
160    dirdep_re = None
161    host_target = None
162    srctops = []
163    objroots = []
164    excludes = []
165    seen = {}
166    obj_deps = []
167    src_deps = []
168    file_deps = []
169
170    def __init__(self, name, conf={}):
171        """if name is set we will parse it now.
172        conf can have the follwing keys:
173
174        SRCTOPS list of tops of the src tree(s).
175
176        CURDIR  the src directory 'bmake' was run from.
177
178        RELDIR  the relative path from SRCTOP to CURDIR
179
180        MACHINE the machine we built for.
181                set to 'none' if we are not cross-building.
182                More specifically if machine cannot be deduced from objdirs.
183
184        TARGET_SPEC
185                Sometimes MACHINE isn't enough.
186
187        HOST_TARGET
188                when we build for the pseudo machine 'host'
189                the object tree uses HOST_TARGET rather than MACHINE.
190
191        OBJROOTS a list of the common prefix for all obj dirs it might
192                end in '/' or '-'.
193
194        DPDEPS  names an optional file to which per file dependencies
195                will be appended.
196                For example if 'some/path/foo.h' is read from SRCTOP
197                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
198                This can allow 'bmake' to learn all the dirs within
199                the tree that depend on 'foo.h'
200
201        EXCLUDES
202                A list of paths to ignore.
203                ccache(1) can otherwise be trouble.
204
205        debug   desired debug level
206
207        debug_out open file to send debug output to (sys.stderr)
208
209        """
210
211        self.name = name
212        self.debug = getv(conf, 'debug', 0)
213        self.debug_out = getv(conf, 'debug_out', sys.stderr)
214
215        self.machine = getv(conf, 'MACHINE', '')
216        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
217        self.target_spec = getv(conf, 'TARGET_SPEC', '')
218        self.curdir = getv(conf, 'CURDIR')
219        self.reldir = getv(conf, 'RELDIR')
220        self.dpdeps = getv(conf, 'DPDEPS')
221        self.line = 0
222
223        if not self.conf:
224            # some of the steps below we want to do only once
225            self.conf = conf
226            self.host_target = getv(conf, 'HOST_TARGET')
227            for srctop in getv(conf, 'SRCTOPS', []):
228                if srctop[-1] != '/':
229                    srctop += '/'
230                if not srctop in self.srctops:
231                    self.srctops.append(srctop)
232                _srctop = os.path.realpath(srctop)
233                if _srctop[-1] != '/':
234                    _srctop += '/'
235                if not _srctop in self.srctops:
236                    self.srctops.append(_srctop)
237
238            trim_list = add_trims(self.machine)
239            if self.machine == 'host':
240                trim_list += add_trims(self.host_target)
241            if self.target_spec:
242                trim_list += add_trims(self.target_spec)
243
244            for objroot in getv(conf, 'OBJROOTS', []):
245                for e in trim_list:
246                    if objroot.endswith(e):
247                        # this is not what we want - fix it
248                        objroot = objroot[0:-len(e)]
249
250                if objroot[-1] != '/':
251                    objroot += '/'
252                if not objroot in self.objroots:
253                    self.objroots.append(objroot)
254                    _objroot = os.path.realpath(objroot)
255                    if objroot[-1] == '/':
256                        _objroot += '/'
257                    if not _objroot in self.objroots:
258                        self.objroots.append(_objroot)
259
260            # we want the longest match
261            self.srctops.sort(reverse=True)
262            self.objroots.sort(reverse=True)
263
264            self.excludes = getv(conf, 'EXCLUDES', [])
265
266            if self.debug:
267                print("host_target=", self.host_target, file=self.debug_out)
268                print("srctops=", self.srctops, file=self.debug_out)
269                print("objroots=", self.objroots, file=self.debug_out)
270                print("excludes=", self.excludes, file=self.debug_out)
271
272            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
273
274        if self.dpdeps and not self.reldir:
275            if self.debug:
276                print("need reldir:", end=' ', file=self.debug_out)
277            if self.curdir:
278                srctop = self.find_top(self.curdir, self.srctops)
279                if srctop:
280                    self.reldir = self.curdir.replace(srctop,'')
281                    if self.debug:
282                        print(self.reldir, file=self.debug_out)
283            if not self.reldir:
284                self.dpdeps = None      # we cannot do it?
285
286        self.cwd = os.getcwd()          # make sure this is initialized
287        self.last_dir = self.cwd
288
289        if name:
290            self.try_parse()
291
292    def reset(self):
293        """reset state if we are being passed meta files from multiple directories."""
294        self.seen = {}
295        self.obj_deps = []
296        self.src_deps = []
297        self.file_deps = []
298
299    def dirdeps(self, sep='\n'):
300        """return DIRDEPS"""
301        return sep.strip() + sep.join(self.obj_deps)
302
303    def src_dirdeps(self, sep='\n'):
304        """return SRC_DIRDEPS"""
305        return sep.strip() + sep.join(self.src_deps)
306
307    def file_depends(self, out=None):
308        """Append DPDEPS_${file} += ${RELDIR}
309        for each file we saw, to the output file."""
310        if not self.reldir:
311            return None
312        for f in sort_unique(self.file_deps):
313            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
314        # these entries provide for reverse DIRDEPS lookup
315        for f in self.obj_deps:
316            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
317
318    def seenit(self, dir):
319        """rememer that we have seen dir."""
320        self.seen[dir] = 1
321
322    def add(self, list, data, clue=''):
323        """add data to list if it isn't already there."""
324        if data not in list:
325            list.append(data)
326            if self.debug:
327                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
328
329    def find_top(self, path, list):
330        """the logical tree may be split across multiple trees"""
331        for top in list:
332            if path.startswith(top):
333                if self.debug > 2:
334                    print("found in", top, file=self.debug_out)
335                return top
336        return None
337
338    def find_obj(self, objroot, dir, path, input):
339        """return path within objroot, taking care of .dirdep files"""
340        ddep = None
341        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
342            if not ddep and os.path.exists(ddepf):
343                ddep = open(ddepf, 'r').readline().strip('# \n')
344                if self.debug > 1:
345                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
346                if ddep.endswith(self.machine):
347                    ddep = ddep[0:-(1+len(self.machine))]
348                elif self.target_spec and ddep.endswith(self.target_spec):
349                    ddep = ddep[0:-(1+len(self.target_spec))]
350
351        if not ddep:
352            # no .dirdeps, so remember that we've seen the raw input
353            self.seenit(input)
354            self.seenit(dir)
355            if self.machine == 'none':
356                if dir.startswith(objroot):
357                    return dir.replace(objroot,'')
358                return None
359            m = self.dirdep_re.match(dir.replace(objroot,''))
360            if m:
361                ddep = m.group(2)
362                dmachine = m.group(1)
363                if dmachine != self.machine:
364                    if not (self.machine == 'host' and
365                            dmachine == self.host_target):
366                        if self.debug > 2:
367                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
368                        ddep += '.' + dmachine
369
370        return ddep
371
372    def try_parse(self, name=None, file=None):
373        """give file and line number causing exception"""
374        try:
375            self.parse(name, file)
376        except:
377            # give a useful clue
378            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
379            raise
380
381    def parse(self, name=None, file=None):
382        """A meta file looks like:
383
384        # Meta data file "path"
385        CMD "command-line"
386        CWD "cwd"
387        TARGET "target"
388        -- command output --
389        -- filemon acquired metadata --
390        # buildmon version 3
391        V 3
392        C "pid" "cwd"
393        E "pid" "path"
394        F "pid" "child"
395        R "pid" "path"
396        W "pid" "path"
397        X "pid" "status"
398        D "pid" "path"
399        L "pid" "src" "target"
400        M "pid" "old" "new"
401        S "pid" "path"
402        # Bye bye
403
404        We go to some effort to avoid processing a dependency more than once.
405        Of the above record types only C,E,F,L,R,V and W are of interest.
406        """
407
408        version = 0                     # unknown
409        if name:
410            self.name = name;
411        if file:
412            f = file
413            cwd = self.last_dir = self.cwd
414        else:
415            f = open(self.name, 'r')
416        skip = True
417        pid_cwd = {}
418        pid_last_dir = {}
419        last_pid = 0
420
421        self.line = 0
422        if self.curdir:
423            self.seenit(self.curdir)    # we ignore this
424
425        interesting = 'CEFLRV'
426        for line in f:
427            self.line += 1
428            # ignore anything we don't care about
429            if not line[0] in interesting:
430                continue
431            if self.debug > 2:
432                print("input:", line, end=' ', file=self.debug_out)
433            w = line.split()
434
435            if skip:
436                if w[0] == 'V':
437                    skip = False
438                    version = int(w[1])
439                    """
440                    if version < 4:
441                        # we cannot ignore 'W' records
442                        # as they may be 'rw'
443                        interesting += 'W'
444                    """
445                elif w[0] == 'CWD':
446                    self.cwd = cwd = self.last_dir = w[1]
447                    self.seenit(cwd)    # ignore this
448                    if self.debug:
449                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
450                continue
451
452            pid = int(w[1])
453            if pid != last_pid:
454                if last_pid:
455                    pid_last_dir[last_pid] = self.last_dir
456                cwd = getv(pid_cwd, pid, self.cwd)
457                self.last_dir = getv(pid_last_dir, pid, self.cwd)
458                last_pid = pid
459
460            # process operations
461            if w[0] == 'F':
462                npid = int(w[2])
463                pid_cwd[npid] = cwd
464                pid_last_dir[npid] = cwd
465                last_pid = npid
466                continue
467            elif w[0] == 'C':
468                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
469                if cwd.endswith('/.'):
470                    cwd = cwd[0:-2]
471                self.last_dir = pid_last_dir[pid] = cwd
472                pid_cwd[pid] = cwd
473                if self.debug > 1:
474                    print("cwd=", cwd, file=self.debug_out)
475                continue
476
477            if w[2] in self.seen:
478                if self.debug > 2:
479                    print("seen:", w[2], file=self.debug_out)
480                continue
481            # file operations
482            if w[0] in 'ML':
483                # these are special, tread src as read and
484                # target as write
485                self.parse_path(w[1].strip("'"), cwd, 'R', w)
486                self.parse_path(w[2].strip("'"), cwd, 'W', w)
487                continue
488            elif w[0] in 'ERWS':
489                path = w[2]
490                self.parse_path(path, cwd, w[0], w)
491
492        if not file:
493            f.close()
494
495    def is_src(self, base, dir, rdir):
496        """is base in srctop"""
497        for dir in [dir,rdir]:
498            if not dir:
499                continue
500            path = '/'.join([dir,base])
501            srctop = self.find_top(path, self.srctops)
502            if srctop:
503                if self.dpdeps:
504                    self.add(self.file_deps, path.replace(srctop,''), 'file')
505                self.add(self.src_deps, dir.replace(srctop,''), 'src')
506                self.seenit(dir)
507                return True
508        return False
509
510    def parse_path(self, path, cwd, op=None, w=[]):
511        """look at a path for the op specified"""
512
513        if not op:
514            op = w[0]
515
516        # we are never interested in .dirdep files as dependencies
517        if path.endswith('.dirdep'):
518            return
519        for p in self.excludes:
520            if p and path.startswith(p):
521                if self.debug > 2:
522                    print("exclude:", p, path, file=self.debug_out)
523                return
524        # we don't want to resolve the last component if it is
525        # a symlink
526        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
527        if not path:
528            return
529        dir,base = os.path.split(path)
530        if dir in self.seen:
531            if self.debug > 2:
532                print("seen:", dir, file=self.debug_out)
533            return
534        # we can have a path in an objdir which is a link
535        # to the src dir, we may need to add dependencies for each
536        rdir = dir
537        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
538        rdir = os.path.realpath(dir)
539        if rdir == dir:
540            rdir = None
541        # now put path back together
542        path = '/'.join([dir,base])
543        if self.debug > 1:
544            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
545        if op in 'RWS':
546            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
547                if self.debug > 1:
548                    print("skipping:", path, file=self.debug_out)
549                return
550            if os.path.isdir(path):
551                if op in 'RW':
552                    self.last_dir = path;
553                if self.debug > 1:
554                    print("ldir=", self.last_dir, file=self.debug_out)
555                return
556
557        if op in 'ERW':
558            # finally, we get down to it
559            if dir == self.cwd or dir == self.curdir:
560                return
561            if self.is_src(base, dir, rdir):
562                self.seenit(w[2])
563                if not rdir:
564                    return
565
566            objroot = None
567            for dir in [dir,rdir]:
568                if not dir:
569                    continue
570                objroot = self.find_top(dir, self.objroots)
571                if objroot:
572                    break
573            if objroot:
574                ddep = self.find_obj(objroot, dir, path, w[2])
575                if ddep:
576                    self.add(self.obj_deps, ddep, 'obj')
577                    if self.dpdeps and objroot.endswith('/stage/'):
578                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
579                        self.add(self.file_deps, sp, 'file')
580            else:
581                # don't waste time looking again
582                self.seenit(w[2])
583                self.seenit(dir)
584
585
586def main(argv, klass=MetaFile, xopts='', xoptf=None):
587    """Simple driver for class MetaFile.
588
589    Usage:
590        script [options] [key=value ...] "meta" ...
591
592    Options and key=value pairs contribute to the
593    dictionary passed to MetaFile.
594
595    -S "SRCTOP"
596                add "SRCTOP" to the "SRCTOPS" list.
597
598    -C "CURDIR"
599
600    -O "OBJROOT"
601                add "OBJROOT" to the "OBJROOTS" list.
602
603    -m "MACHINE"
604
605    -a "MACHINE_ARCH"
606
607    -H "HOST_TARGET"
608
609    -D "DPDEPS"
610
611    -d  bumps debug level
612
613    """
614    import getopt
615
616    # import Psyco if we can
617    # it can speed things up quite a bit
618    have_psyco = 0
619    try:
620        import psyco
621        psyco.full()
622        have_psyco = 1
623    except:
624        pass
625
626    conf = {
627        'SRCTOPS': [],
628        'OBJROOTS': [],
629        'EXCLUDES': [],
630        }
631
632    try:
633        machine = os.environ['MACHINE']
634        if machine:
635            conf['MACHINE'] = machine
636        machine_arch = os.environ['MACHINE_ARCH']
637        if machine_arch:
638            conf['MACHINE_ARCH'] = machine_arch
639        srctop = os.environ['SB_SRC']
640        if srctop:
641            conf['SRCTOPS'].append(srctop)
642        objroot = os.environ['SB_OBJROOT']
643        if objroot:
644            conf['OBJROOTS'].append(objroot)
645    except:
646        pass
647
648    debug = 0
649    output = True
650
651    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
652    for o, a in opts:
653        if o == '-a':
654            conf['MACHINE_ARCH'] = a
655        elif o == '-d':
656            debug += 1
657        elif o == '-q':
658            output = False
659        elif o == '-H':
660            conf['HOST_TARGET'] = a
661        elif o == '-S':
662            if a not in conf['SRCTOPS']:
663                conf['SRCTOPS'].append(a)
664        elif o == '-C':
665            conf['CURDIR'] = a
666        elif o == '-O':
667            if a not in conf['OBJROOTS']:
668                conf['OBJROOTS'].append(a)
669        elif o == '-R':
670            conf['RELDIR'] = a
671        elif o == '-D':
672            conf['DPDEPS'] = a
673        elif o == '-m':
674            conf['MACHINE'] = a
675        elif o == '-T':
676            conf['TARGET_SPEC'] = a
677        elif o == '-X':
678            if a not in conf['EXCLUDES']:
679                conf['EXCLUDES'].append(a)
680        elif xoptf:
681            xoptf(o, a, conf)
682
683    conf['debug'] = debug
684
685    # get any var=val assignments
686    eaten = []
687    for a in args:
688        if a.find('=') > 0:
689            k,v = a.split('=')
690            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
691                if k == 'SRCTOP':
692                    k = 'SRCTOPS'
693                elif k == 'OBJROOT':
694                    k = 'OBJROOTS'
695                if v not in conf[k]:
696                    conf[k].append(v)
697            else:
698                conf[k] = v
699            eaten.append(a)
700            continue
701        break
702
703    for a in eaten:
704        args.remove(a)
705
706    debug_out = getv(conf, 'debug_out', sys.stderr)
707
708    if debug:
709        print("config:", file=debug_out)
710        print("psyco=", have_psyco, file=debug_out)
711        for k,v in list(conf.items()):
712            print("%s=%s" % (k,v), file=debug_out)
713
714    m = None
715    for a in args:
716        if a.endswith('.meta'):
717            if not os.path.exists(a):
718                continue
719            m = klass(a, conf)
720        elif a.startswith('@'):
721            # there can actually multiple files per line
722            for line in open(a[1:]):
723                for f in line.strip().split():
724                    if not os.path.exists(f):
725                        continue
726                    m = klass(f, conf)
727
728    if output and m:
729        print(m.dirdeps())
730
731        print(m.src_dirdeps('\nsrc:'))
732
733        dpdeps = getv(conf, 'DPDEPS')
734        if dpdeps:
735            m.file_depends(open(dpdeps, 'wb'))
736
737    return m
738
739if __name__ == '__main__':
740    try:
741        main(sys.argv)
742    except:
743        # yes, this goes to stdout
744        print("ERROR: ", sys.exc_info()[1])
745        raise
746
747