xref: /freebsd/share/mk/meta2deps.py (revision 732a02b4e77866604a120a275c082bb6221bd2ff)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$FreeBSD$
41	$Id: meta2deps.py,v 1.27 2017/05/24 00:04:04 sjg Exp $
42
43	Copyright (c) 2011-2013, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        return path
85    if path == '.':
86        return cwd
87    if path.startswith('./'):
88        return cwd + path[1:]
89    if last_dir == cwd:
90        last_dir = None
91    for d in [last_dir, cwd]:
92        if not d:
93            continue
94        if path == '..':
95            dw = d.split('/')
96            p = '/'.join(dw[:-1])
97            if not p:
98                p = '/'
99            return p
100        p = '/'.join([d,path])
101        if debug > 2:
102            print("looking for:", p, end=' ', file=debug_out)
103        if not os.path.exists(p):
104            if debug > 2:
105                print("nope", file=debug_out)
106            p = None
107            continue
108        if debug > 2:
109            print("found:", p, file=debug_out)
110        return p
111    return None
112
113def cleanpath(path):
114    """cleanup path without using realpath(3)"""
115    if path.startswith('/'):
116        r = '/'
117    else:
118        r = ''
119    p = []
120    w = path.split('/')
121    for d in w:
122        if not d or d == '.':
123            continue
124        if d == '..':
125            try:
126                p.pop()
127                continue
128            except:
129                break
130        p.append(d)
131
132    return r + '/'.join(p)
133
134def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
135    """
136    Return an absolute path, resolving via cwd or last_dir if needed.
137    this gets called a lot, so we try to avoid calling realpath.
138    """
139    rpath = resolve(path, cwd, last_dir, debug, debug_out)
140    if rpath:
141        path = rpath
142    if (path.find('/') < 0 or
143        path.find('./') > 0 or
144        path.endswith('/..')):
145        path = cleanpath(path)
146    return path
147
148def sort_unique(list, cmp=None, key=None, reverse=False):
149    list.sort(cmp, key, reverse)
150    nl = []
151    le = None
152    for e in list:
153        if e == le:
154            continue
155        le = e
156        nl.append(e)
157    return nl
158
159def add_trims(x):
160    return ['/' + x + '/',
161            '/' + x,
162            x + '/',
163            x]
164
165class MetaFile:
166    """class to parse meta files generated by bmake."""
167
168    conf = None
169    dirdep_re = None
170    host_target = None
171    srctops = []
172    objroots = []
173    excludes = []
174    seen = {}
175    obj_deps = []
176    src_deps = []
177    file_deps = []
178
179    def __init__(self, name, conf={}):
180        """if name is set we will parse it now.
181        conf can have the follwing keys:
182
183        SRCTOPS list of tops of the src tree(s).
184
185        CURDIR  the src directory 'bmake' was run from.
186
187        RELDIR  the relative path from SRCTOP to CURDIR
188
189        MACHINE the machine we built for.
190                set to 'none' if we are not cross-building.
191                More specifically if machine cannot be deduced from objdirs.
192
193        TARGET_SPEC
194                Sometimes MACHINE isn't enough.
195
196        HOST_TARGET
197                when we build for the pseudo machine 'host'
198                the object tree uses HOST_TARGET rather than MACHINE.
199
200        OBJROOTS a list of the common prefix for all obj dirs it might
201                end in '/' or '-'.
202
203        DPDEPS  names an optional file to which per file dependencies
204                will be appended.
205                For example if 'some/path/foo.h' is read from SRCTOP
206                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
207                This can allow 'bmake' to learn all the dirs within
208                the tree that depend on 'foo.h'
209
210        EXCLUDES
211                A list of paths to ignore.
212                ccache(1) can otherwise be trouble.
213
214        debug   desired debug level
215
216        debug_out open file to send debug output to (sys.stderr)
217
218        """
219
220        self.name = name
221        self.debug = getv(conf, 'debug', 0)
222        self.debug_out = getv(conf, 'debug_out', sys.stderr)
223
224        self.machine = getv(conf, 'MACHINE', '')
225        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
226        self.target_spec = getv(conf, 'TARGET_SPEC', '')
227        self.curdir = getv(conf, 'CURDIR')
228        self.reldir = getv(conf, 'RELDIR')
229        self.dpdeps = getv(conf, 'DPDEPS')
230        self.line = 0
231
232        if not self.conf:
233            # some of the steps below we want to do only once
234            self.conf = conf
235            self.host_target = getv(conf, 'HOST_TARGET')
236            for srctop in getv(conf, 'SRCTOPS', []):
237                if srctop[-1] != '/':
238                    srctop += '/'
239                if not srctop in self.srctops:
240                    self.srctops.append(srctop)
241                _srctop = os.path.realpath(srctop)
242                if _srctop[-1] != '/':
243                    _srctop += '/'
244                if not _srctop in self.srctops:
245                    self.srctops.append(_srctop)
246
247            trim_list = add_trims(self.machine)
248            if self.machine == 'host':
249                trim_list += add_trims(self.host_target)
250            if self.target_spec:
251                trim_list += add_trims(self.target_spec)
252
253            for objroot in getv(conf, 'OBJROOTS', []):
254                for e in trim_list:
255                    if objroot.endswith(e):
256                        # this is not what we want - fix it
257                        objroot = objroot[0:-len(e)]
258
259                if objroot[-1] != '/':
260                    objroot += '/'
261                if not objroot in self.objroots:
262                    self.objroots.append(objroot)
263                    _objroot = os.path.realpath(objroot)
264                    if objroot[-1] == '/':
265                        _objroot += '/'
266                    if not _objroot in self.objroots:
267                        self.objroots.append(_objroot)
268
269            # we want the longest match
270            self.srctops.sort(reverse=True)
271            self.objroots.sort(reverse=True)
272
273            self.excludes = getv(conf, 'EXCLUDES', [])
274
275            if self.debug:
276                print("host_target=", self.host_target, file=self.debug_out)
277                print("srctops=", self.srctops, file=self.debug_out)
278                print("objroots=", self.objroots, file=self.debug_out)
279                print("excludes=", self.excludes, file=self.debug_out)
280
281            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
282
283        if self.dpdeps and not self.reldir:
284            if self.debug:
285                print("need reldir:", end=' ', file=self.debug_out)
286            if self.curdir:
287                srctop = self.find_top(self.curdir, self.srctops)
288                if srctop:
289                    self.reldir = self.curdir.replace(srctop,'')
290                    if self.debug:
291                        print(self.reldir, file=self.debug_out)
292            if not self.reldir:
293                self.dpdeps = None      # we cannot do it?
294
295        self.cwd = os.getcwd()          # make sure this is initialized
296        self.last_dir = self.cwd
297
298        if name:
299            self.try_parse()
300
301    def reset(self):
302        """reset state if we are being passed meta files from multiple directories."""
303        self.seen = {}
304        self.obj_deps = []
305        self.src_deps = []
306        self.file_deps = []
307
308    def dirdeps(self, sep='\n'):
309        """return DIRDEPS"""
310        return sep.strip() + sep.join(self.obj_deps)
311
312    def src_dirdeps(self, sep='\n'):
313        """return SRC_DIRDEPS"""
314        return sep.strip() + sep.join(self.src_deps)
315
316    def file_depends(self, out=None):
317        """Append DPDEPS_${file} += ${RELDIR}
318        for each file we saw, to the output file."""
319        if not self.reldir:
320            return None
321        for f in sort_unique(self.file_deps):
322            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
323        # these entries provide for reverse DIRDEPS lookup
324        for f in self.obj_deps:
325            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
326
327    def seenit(self, dir):
328        """rememer that we have seen dir."""
329        self.seen[dir] = 1
330
331    def add(self, list, data, clue=''):
332        """add data to list if it isn't already there."""
333        if data not in list:
334            list.append(data)
335            if self.debug:
336                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
337
338    def find_top(self, path, list):
339        """the logical tree may be split across multiple trees"""
340        for top in list:
341            if path.startswith(top):
342                if self.debug > 2:
343                    print("found in", top, file=self.debug_out)
344                return top
345        return None
346
347    def find_obj(self, objroot, dir, path, input):
348        """return path within objroot, taking care of .dirdep files"""
349        ddep = None
350        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
351            if not ddep and os.path.exists(ddepf):
352                ddep = open(ddepf, 'r').readline().strip('# \n')
353                if self.debug > 1:
354                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
355                if ddep.endswith(self.machine):
356                    ddep = ddep[0:-(1+len(self.machine))]
357                elif self.target_spec and ddep.endswith(self.target_spec):
358                    ddep = ddep[0:-(1+len(self.target_spec))]
359
360        if not ddep:
361            # no .dirdeps, so remember that we've seen the raw input
362            self.seenit(input)
363            self.seenit(dir)
364            if self.machine == 'none':
365                if dir.startswith(objroot):
366                    return dir.replace(objroot,'')
367                return None
368            m = self.dirdep_re.match(dir.replace(objroot,''))
369            if m:
370                ddep = m.group(2)
371                dmachine = m.group(1)
372                if dmachine != self.machine:
373                    if not (self.machine == 'host' and
374                            dmachine == self.host_target):
375                        if self.debug > 2:
376                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
377                        ddep += '.' + dmachine
378
379        return ddep
380
381    def try_parse(self, name=None, file=None):
382        """give file and line number causing exception"""
383        try:
384            self.parse(name, file)
385        except:
386            # give a useful clue
387            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
388            raise
389
390    def parse(self, name=None, file=None):
391        """A meta file looks like:
392
393        # Meta data file "path"
394        CMD "command-line"
395        CWD "cwd"
396        TARGET "target"
397        -- command output --
398        -- filemon acquired metadata --
399        # buildmon version 3
400        V 3
401        C "pid" "cwd"
402        E "pid" "path"
403        F "pid" "child"
404        R "pid" "path"
405        W "pid" "path"
406        X "pid" "status"
407        D "pid" "path"
408        L "pid" "src" "target"
409        M "pid" "old" "new"
410        S "pid" "path"
411        # Bye bye
412
413        We go to some effort to avoid processing a dependency more than once.
414        Of the above record types only C,E,F,L,R,V and W are of interest.
415        """
416
417        version = 0                     # unknown
418        if name:
419            self.name = name;
420        if file:
421            f = file
422            cwd = self.last_dir = self.cwd
423        else:
424            f = open(self.name, 'r')
425        skip = True
426        pid_cwd = {}
427        pid_last_dir = {}
428        last_pid = 0
429
430        self.line = 0
431        if self.curdir:
432            self.seenit(self.curdir)    # we ignore this
433
434        interesting = 'CEFLRV'
435        for line in f:
436            self.line += 1
437            # ignore anything we don't care about
438            if not line[0] in interesting:
439                continue
440            if self.debug > 2:
441                print("input:", line, end=' ', file=self.debug_out)
442            w = line.split()
443
444            if skip:
445                if w[0] == 'V':
446                    skip = False
447                    version = int(w[1])
448                    """
449                    if version < 4:
450                        # we cannot ignore 'W' records
451                        # as they may be 'rw'
452                        interesting += 'W'
453                    """
454                elif w[0] == 'CWD':
455                    self.cwd = cwd = self.last_dir = w[1]
456                    self.seenit(cwd)    # ignore this
457                    if self.debug:
458                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
459                continue
460
461            pid = int(w[1])
462            if pid != last_pid:
463                if last_pid:
464                    pid_last_dir[last_pid] = self.last_dir
465                cwd = getv(pid_cwd, pid, self.cwd)
466                self.last_dir = getv(pid_last_dir, pid, self.cwd)
467                last_pid = pid
468
469            # process operations
470            if w[0] == 'F':
471                npid = int(w[2])
472                pid_cwd[npid] = cwd
473                pid_last_dir[npid] = cwd
474                last_pid = npid
475                continue
476            elif w[0] == 'C':
477                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
478                if cwd.endswith('/.'):
479                    cwd = cwd[0:-2]
480                self.last_dir = pid_last_dir[pid] = cwd
481                pid_cwd[pid] = cwd
482                if self.debug > 1:
483                    print("cwd=", cwd, file=self.debug_out)
484                continue
485
486            if w[2] in self.seen:
487                if self.debug > 2:
488                    print("seen:", w[2], file=self.debug_out)
489                continue
490            # file operations
491            if w[0] in 'ML':
492                # these are special, tread src as read and
493                # target as write
494                self.parse_path(w[1].strip("'"), cwd, 'R', w)
495                self.parse_path(w[2].strip("'"), cwd, 'W', w)
496                continue
497            elif w[0] in 'ERWS':
498                path = w[2]
499                self.parse_path(path, cwd, w[0], w)
500
501        if not file:
502            f.close()
503
504    def is_src(self, base, dir, rdir):
505        """is base in srctop"""
506        for dir in [dir,rdir]:
507            if not dir:
508                continue
509            path = '/'.join([dir,base])
510            srctop = self.find_top(path, self.srctops)
511            if srctop:
512                if self.dpdeps:
513                    self.add(self.file_deps, path.replace(srctop,''), 'file')
514                self.add(self.src_deps, dir.replace(srctop,''), 'src')
515                self.seenit(dir)
516                return True
517        return False
518
519    def parse_path(self, path, cwd, op=None, w=[]):
520        """look at a path for the op specified"""
521
522        if not op:
523            op = w[0]
524
525        # we are never interested in .dirdep files as dependencies
526        if path.endswith('.dirdep'):
527            return
528        for p in self.excludes:
529            if p and path.startswith(p):
530                if self.debug > 2:
531                    print("exclude:", p, path, file=self.debug_out)
532                return
533        # we don't want to resolve the last component if it is
534        # a symlink
535        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
536        if not path:
537            return
538        dir,base = os.path.split(path)
539        if dir in self.seen:
540            if self.debug > 2:
541                print("seen:", dir, file=self.debug_out)
542            return
543        # we can have a path in an objdir which is a link
544        # to the src dir, we may need to add dependencies for each
545        rdir = dir
546        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
547        rdir = os.path.realpath(dir)
548        if rdir == dir:
549            rdir = None
550        # now put path back together
551        path = '/'.join([dir,base])
552        if self.debug > 1:
553            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
554        if op in 'RWS':
555            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
556                if self.debug > 1:
557                    print("skipping:", path, file=self.debug_out)
558                return
559            if os.path.isdir(path):
560                if op in 'RW':
561                    self.last_dir = path;
562                if self.debug > 1:
563                    print("ldir=", self.last_dir, file=self.debug_out)
564                return
565
566        if op in 'ERW':
567            # finally, we get down to it
568            if dir == self.cwd or dir == self.curdir:
569                return
570            if self.is_src(base, dir, rdir):
571                self.seenit(w[2])
572                if not rdir:
573                    return
574
575            objroot = None
576            for dir in [dir,rdir]:
577                if not dir:
578                    continue
579                objroot = self.find_top(dir, self.objroots)
580                if objroot:
581                    break
582            if objroot:
583                ddep = self.find_obj(objroot, dir, path, w[2])
584                if ddep:
585                    self.add(self.obj_deps, ddep, 'obj')
586                    if self.dpdeps and objroot.endswith('/stage/'):
587                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
588                        self.add(self.file_deps, sp, 'file')
589            else:
590                # don't waste time looking again
591                self.seenit(w[2])
592                self.seenit(dir)
593
594
595def main(argv, klass=MetaFile, xopts='', xoptf=None):
596    """Simple driver for class MetaFile.
597
598    Usage:
599        script [options] [key=value ...] "meta" ...
600
601    Options and key=value pairs contribute to the
602    dictionary passed to MetaFile.
603
604    -S "SRCTOP"
605                add "SRCTOP" to the "SRCTOPS" list.
606
607    -C "CURDIR"
608
609    -O "OBJROOT"
610                add "OBJROOT" to the "OBJROOTS" list.
611
612    -m "MACHINE"
613
614    -a "MACHINE_ARCH"
615
616    -H "HOST_TARGET"
617
618    -D "DPDEPS"
619
620    -d  bumps debug level
621
622    """
623    import getopt
624
625    # import Psyco if we can
626    # it can speed things up quite a bit
627    have_psyco = 0
628    try:
629        import psyco
630        psyco.full()
631        have_psyco = 1
632    except:
633        pass
634
635    conf = {
636        'SRCTOPS': [],
637        'OBJROOTS': [],
638        'EXCLUDES': [],
639        }
640
641    try:
642        machine = os.environ['MACHINE']
643        if machine:
644            conf['MACHINE'] = machine
645        machine_arch = os.environ['MACHINE_ARCH']
646        if machine_arch:
647            conf['MACHINE_ARCH'] = machine_arch
648        srctop = os.environ['SB_SRC']
649        if srctop:
650            conf['SRCTOPS'].append(srctop)
651        objroot = os.environ['SB_OBJROOT']
652        if objroot:
653            conf['OBJROOTS'].append(objroot)
654    except:
655        pass
656
657    debug = 0
658    output = True
659
660    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
661    for o, a in opts:
662        if o == '-a':
663            conf['MACHINE_ARCH'] = a
664        elif o == '-d':
665            debug += 1
666        elif o == '-q':
667            output = False
668        elif o == '-H':
669            conf['HOST_TARGET'] = a
670        elif o == '-S':
671            if a not in conf['SRCTOPS']:
672                conf['SRCTOPS'].append(a)
673        elif o == '-C':
674            conf['CURDIR'] = a
675        elif o == '-O':
676            if a not in conf['OBJROOTS']:
677                conf['OBJROOTS'].append(a)
678        elif o == '-R':
679            conf['RELDIR'] = a
680        elif o == '-D':
681            conf['DPDEPS'] = a
682        elif o == '-m':
683            conf['MACHINE'] = a
684        elif o == '-T':
685            conf['TARGET_SPEC'] = a
686        elif o == '-X':
687            if a not in conf['EXCLUDES']:
688                conf['EXCLUDES'].append(a)
689        elif xoptf:
690            xoptf(o, a, conf)
691
692    conf['debug'] = debug
693
694    # get any var=val assignments
695    eaten = []
696    for a in args:
697        if a.find('=') > 0:
698            k,v = a.split('=')
699            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
700                if k == 'SRCTOP':
701                    k = 'SRCTOPS'
702                elif k == 'OBJROOT':
703                    k = 'OBJROOTS'
704                if v not in conf[k]:
705                    conf[k].append(v)
706            else:
707                conf[k] = v
708            eaten.append(a)
709            continue
710        break
711
712    for a in eaten:
713        args.remove(a)
714
715    debug_out = getv(conf, 'debug_out', sys.stderr)
716
717    if debug:
718        print("config:", file=debug_out)
719        print("psyco=", have_psyco, file=debug_out)
720        for k,v in list(conf.items()):
721            print("%s=%s" % (k,v), file=debug_out)
722
723    m = None
724    for a in args:
725        if a.endswith('.meta'):
726            if not os.path.exists(a):
727                continue
728            m = klass(a, conf)
729        elif a.startswith('@'):
730            # there can actually multiple files per line
731            for line in open(a[1:]):
732                for f in line.strip().split():
733                    if not os.path.exists(f):
734                        continue
735                    m = klass(f, conf)
736
737    if output and m:
738        print(m.dirdeps())
739
740        print(m.src_dirdeps('\nsrc:'))
741
742        dpdeps = getv(conf, 'DPDEPS')
743        if dpdeps:
744            m.file_depends(open(dpdeps, 'wb'))
745
746    return m
747
748if __name__ == '__main__':
749    try:
750        main(sys.argv)
751    except:
752        # yes, this goes to stdout
753        print("ERROR: ", sys.exc_info()[1])
754        raise
755
756