xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision 273c26a3c3bea87a241d6879abd4f991db180bf0)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.19 2016/04/02 20:45:40 sjg Exp $
41
42	Copyright (c) 2011-2013, Juniper Networks, Inc.
43	All rights reserved.
44
45	Redistribution and use in source and binary forms, with or without
46	modification, are permitted provided that the following conditions
47	are met:
48	1. Redistributions of source code must retain the above copyright
49	   notice, this list of conditions and the following disclaimer.
50	2. Redistributions in binary form must reproduce the above copyright
51	   notice, this list of conditions and the following disclaimer in the
52	   documentation and/or other materials provided with the distribution.
53
54	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
65
66"""
67
68import os, re, sys
69
70def getv(dict, key, d=None):
71    """Lookup key in dict and return value or the supplied default."""
72    if key in dict:
73        return dict[key]
74    return d
75
76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
77    """
78    Return an absolute path, resolving via cwd or last_dir if needed.
79    """
80    if path.endswith('/.'):
81        path = path[0:-2]
82    if len(path) > 0 and path[0] == '/':
83        return path
84    if path == '.':
85        return cwd
86    if path.startswith('./'):
87        return cwd + path[1:]
88    if last_dir == cwd:
89        last_dir = None
90    for d in [last_dir, cwd]:
91        if not d:
92            continue
93        p = '/'.join([d,path])
94        if debug > 2:
95            print("looking for:", p, end=' ', file=debug_out)
96        if not os.path.exists(p):
97            if debug > 2:
98                print("nope", file=debug_out)
99            p = None
100            continue
101        if debug > 2:
102            print("found:", p, file=debug_out)
103        return p
104    return None
105
106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
107    """
108    Return an absolute path, resolving via cwd or last_dir if needed.
109    this gets called a lot, so we try to avoid calling realpath
110    until we know we have something.
111    """
112    rpath = resolve(path, cwd, last_dir, debug, debug_out)
113    if rpath:
114        path = rpath
115    if (path.find('/') < 0 or
116	path.find('./') > 0 or
117        path.endswith('/..') or
118        os.path.islink(path)):
119        return os.path.realpath(path)
120    return path
121
122def sort_unique(list, cmp=None, key=None, reverse=False):
123    list.sort(cmp, key, reverse)
124    nl = []
125    le = None
126    for e in list:
127        if e == le:
128            continue
129        nl.append(e)
130    return nl
131
132def add_trims(x):
133    return ['/' + x + '/',
134            '/' + x,
135            x + '/',
136            x]
137
138class MetaFile:
139    """class to parse meta files generated by bmake."""
140
141    conf = None
142    dirdep_re = None
143    host_target = None
144    srctops = []
145    objroots = []
146    excludes = []
147    seen = {}
148    obj_deps = []
149    src_deps = []
150    file_deps = []
151
152    def __init__(self, name, conf={}):
153        """if name is set we will parse it now.
154        conf can have the follwing keys:
155
156        SRCTOPS	list of tops of the src tree(s).
157
158        CURDIR	the src directory 'bmake' was run from.
159
160        RELDIR	the relative path from SRCTOP to CURDIR
161
162        MACHINE	the machine we built for.
163        	set to 'none' if we are not cross-building.
164		More specifically if machine cannot be deduced from objdirs.
165
166        TARGET_SPEC
167        	Sometimes MACHINE isn't enough.
168
169        HOST_TARGET
170		when we build for the pseudo machine 'host'
171		the object tree uses HOST_TARGET rather than MACHINE.
172
173        OBJROOTS a list of the common prefix for all obj dirs it might
174		end in '/' or '-'.
175
176        DPDEPS	names an optional file to which per file dependencies
177		will be appended.
178		For example if 'some/path/foo.h' is read from SRCTOP
179		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
180		This can allow 'bmake' to learn all the dirs within
181 		the tree that depend on 'foo.h'
182
183	EXCLUDES
184		A list of paths to ignore.
185		ccache(1) can otherwise be trouble.
186
187        debug	desired debug level
188
189        debug_out open file to send debug output to (sys.stderr)
190
191        """
192
193        self.name = name
194        self.debug = getv(conf, 'debug', 0)
195        self.debug_out = getv(conf, 'debug_out', sys.stderr)
196
197        self.machine = getv(conf, 'MACHINE', '')
198        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
199        self.target_spec = getv(conf, 'TARGET_SPEC', '')
200        self.curdir = getv(conf, 'CURDIR')
201        self.reldir = getv(conf, 'RELDIR')
202        self.dpdeps = getv(conf, 'DPDEPS')
203        self.line = 0
204
205        if not self.conf:
206            # some of the steps below we want to do only once
207            self.conf = conf
208            self.host_target = getv(conf, 'HOST_TARGET')
209            for srctop in getv(conf, 'SRCTOPS', []):
210                if srctop[-1] != '/':
211                    srctop += '/'
212                if not srctop in self.srctops:
213                    self.srctops.append(srctop)
214                _srctop = os.path.realpath(srctop)
215                if _srctop[-1] != '/':
216                    _srctop += '/'
217                if not _srctop in self.srctops:
218                    self.srctops.append(_srctop)
219
220            trim_list = add_trims(self.machine)
221            if self.machine == 'host':
222                trim_list += add_trims(self.host_target)
223            if self.target_spec:
224                trim_list += add_trims(self.target_spec)
225
226            for objroot in getv(conf, 'OBJROOTS', []):
227                for e in trim_list:
228                    if objroot.endswith(e):
229                        # this is not what we want - fix it
230                        objroot = objroot[0:-len(e)]
231                        if e.endswith('/'):
232                            objroot += '/'
233                if not objroot in self.objroots:
234                    self.objroots.append(objroot)
235                    _objroot = os.path.realpath(objroot)
236                    if objroot[-1] == '/':
237                        _objroot += '/'
238                    if not _objroot in self.objroots:
239                        self.objroots.append(_objroot)
240
241            # we want the longest match
242            self.srctops.sort(reverse=True)
243            self.objroots.sort(reverse=True)
244
245            self.excludes = getv(conf, 'EXCLUDES', [])
246
247            if self.debug:
248                print("host_target=", self.host_target, file=self.debug_out)
249                print("srctops=", self.srctops, file=self.debug_out)
250                print("objroots=", self.objroots, file=self.debug_out)
251                print("excludes=", self.excludes, file=self.debug_out)
252
253            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
254
255        if self.dpdeps and not self.reldir:
256            if self.debug:
257                print("need reldir:", end=' ', file=self.debug_out)
258            if self.curdir:
259                srctop = self.find_top(self.curdir, self.srctops)
260                if srctop:
261                    self.reldir = self.curdir.replace(srctop,'')
262                    if self.debug:
263                        print(self.reldir, file=self.debug_out)
264            if not self.reldir:
265                self.dpdeps = None      # we cannot do it?
266
267        self.cwd = os.getcwd()          # make sure this is initialized
268        self.last_dir = self.cwd
269
270        if name:
271            self.try_parse()
272
273    def reset(self):
274        """reset state if we are being passed meta files from multiple directories."""
275        self.seen = {}
276        self.obj_deps = []
277        self.src_deps = []
278        self.file_deps = []
279
280    def dirdeps(self, sep='\n'):
281        """return DIRDEPS"""
282        return sep.strip() + sep.join(self.obj_deps)
283
284    def src_dirdeps(self, sep='\n'):
285        """return SRC_DIRDEPS"""
286        return sep.strip() + sep.join(self.src_deps)
287
288    def file_depends(self, out=None):
289        """Append DPDEPS_${file} += ${RELDIR}
290        for each file we saw, to the output file."""
291        if not self.reldir:
292            return None
293        for f in sort_unique(self.file_deps):
294            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
295
296    def seenit(self, dir):
297        """rememer that we have seen dir."""
298        self.seen[dir] = 1
299
300    def add(self, list, data, clue=''):
301        """add data to list if it isn't already there."""
302        if data not in list:
303            list.append(data)
304            if self.debug:
305                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
306
307    def find_top(self, path, list):
308        """the logical tree may be split across multiple trees"""
309        for top in list:
310            if path.startswith(top):
311                if self.debug > 2:
312                    print("found in", top, file=self.debug_out)
313                return top
314        return None
315
316    def find_obj(self, objroot, dir, path, input):
317        """return path within objroot, taking care of .dirdep files"""
318        ddep = None
319        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
320            if not ddep and os.path.exists(ddepf):
321                ddep = open(ddepf, 'r').readline().strip('# \n')
322                if self.debug > 1:
323                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
324                if ddep.endswith(self.machine):
325                    ddep = ddep[0:-(1+len(self.machine))]
326                elif self.target_spec and ddep.endswith(self.target_spec):
327                    ddep = ddep[0:-(1+len(self.target_spec))]
328
329        if not ddep:
330            # no .dirdeps, so remember that we've seen the raw input
331            self.seenit(input)
332            self.seenit(dir)
333            if self.machine == 'none':
334                if dir.startswith(objroot):
335                    return dir.replace(objroot,'')
336                return None
337            m = self.dirdep_re.match(dir.replace(objroot,''))
338            if m:
339                ddep = m.group(2)
340                dmachine = m.group(1)
341                if dmachine != self.machine:
342                    if not (self.machine == 'host' and
343                            dmachine == self.host_target):
344                        if self.debug > 2:
345                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
346                        ddep += '.' + dmachine
347
348        return ddep
349
350    def try_parse(self, name=None, file=None):
351        """give file and line number causing exception"""
352        try:
353            self.parse(name, file)
354        except:
355            # give a useful clue
356            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
357            raise
358
359    def parse(self, name=None, file=None):
360        """A meta file looks like:
361
362	# Meta data file "path"
363	CMD "command-line"
364	CWD "cwd"
365	TARGET "target"
366	-- command output --
367	-- filemon acquired metadata --
368	# buildmon version 3
369	V 3
370	C "pid" "cwd"
371	E "pid" "path"
372	F "pid" "child"
373	R "pid" "path"
374	W "pid" "path"
375	X "pid" "status"
376	D "pid" "path"
377	L "pid" "src" "target"
378	M "pid" "old" "new"
379	S "pid" "path"
380	# Bye bye
381
382	We go to some effort to avoid processing a dependency more than once.
383	Of the above record types only C,E,F,L,R,V and W are of interest.
384        """
385
386        version = 0                     # unknown
387        if name:
388            self.name = name;
389        if file:
390            f = file
391            cwd = self.last_dir = self.cwd
392        else:
393            f = open(self.name, 'r')
394        skip = True
395        pid_cwd = {}
396        pid_last_dir = {}
397        last_pid = 0
398
399        self.line = 0
400        if self.curdir:
401            self.seenit(self.curdir)    # we ignore this
402
403        interesting = 'CEFLRV'
404        for line in f:
405            self.line += 1
406            # ignore anything we don't care about
407            if not line[0] in interesting:
408                continue
409            if self.debug > 2:
410                print("input:", line, end=' ', file=self.debug_out)
411            w = line.split()
412
413            if skip:
414                if w[0] == 'V':
415                    skip = False
416                    version = int(w[1])
417                    """
418                    if version < 4:
419                        # we cannot ignore 'W' records
420                        # as they may be 'rw'
421                        interesting += 'W'
422                    """
423                elif w[0] == 'CWD':
424                    self.cwd = cwd = self.last_dir = w[1]
425                    self.seenit(cwd)    # ignore this
426                    if self.debug:
427                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
428                continue
429
430            pid = int(w[1])
431            if pid != last_pid:
432                if last_pid:
433                    pid_cwd[last_pid] = cwd
434                    pid_last_dir[last_pid] = self.last_dir
435                cwd = getv(pid_cwd, pid, self.cwd)
436                self.last_dir = getv(pid_last_dir, pid, self.cwd)
437                last_pid = pid
438
439            # process operations
440            if w[0] == 'F':
441                npid = int(w[2])
442                pid_cwd[npid] = cwd
443                pid_last_dir[npid] = cwd
444                last_pid = npid
445                continue
446            elif w[0] == 'C':
447                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
448                if cwd.endswith('/.'):
449                    cwd = cwd[0:-2]
450                self.last_dir = cwd
451                if self.debug > 1:
452                    print("cwd=", cwd, file=self.debug_out)
453                continue
454
455            if w[2] in self.seen:
456                if self.debug > 2:
457                    print("seen:", w[2], file=self.debug_out)
458                continue
459            # file operations
460            if w[0] in 'ML':
461                # these are special, tread src as read and
462                # target as write
463                self.parse_path(w[1].strip("'"), cwd, 'R', w)
464                self.parse_path(w[2].strip("'"), cwd, 'W', w)
465                continue
466            elif w[0] in 'ERWS':
467                path = w[2]
468                self.parse_path(path, cwd, w[0], w)
469
470        if not file:
471            f.close()
472
473    def parse_path(self, path, cwd, op=None, w=[]):
474        """look at a path for the op specified"""
475
476        if not op:
477            op = w[0]
478
479        # we are never interested in .dirdep files as dependencies
480        if path.endswith('.dirdep'):
481            return
482        for p in self.excludes:
483            if p and path.startswith(p):
484                if self.debug > 2:
485                    print("exclude:", p, path, file=self.debug_out)
486                return
487        # we don't want to resolve the last component if it is
488        # a symlink
489        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
490        if not path:
491            return
492        dir,base = os.path.split(path)
493        if dir in self.seen:
494            if self.debug > 2:
495                print("seen:", dir, file=self.debug_out)
496            return
497        # we can have a path in an objdir which is a link
498        # to the src dir, we may need to add dependencies for each
499        rdir = dir
500        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
501        if rdir == dir or rdir.find('./') > 0:
502            rdir = None
503        # now put path back together
504        path = '/'.join([dir,base])
505        if self.debug > 1:
506            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
507        if op in 'RWS':
508            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
509                if self.debug > 1:
510                    print("skipping:", path, file=self.debug_out)
511                return
512            if os.path.isdir(path):
513                if op in 'RW':
514                    self.last_dir = path;
515                if self.debug > 1:
516                    print("ldir=", self.last_dir, file=self.debug_out)
517                return
518
519        if op in 'ERW':
520            # finally, we get down to it
521            if dir == self.cwd or dir == self.curdir:
522                return
523            srctop = self.find_top(path, self.srctops)
524            if srctop:
525                if self.dpdeps:
526                    self.add(self.file_deps, path.replace(srctop,''), 'file')
527                self.add(self.src_deps, dir.replace(srctop,''), 'src')
528                self.seenit(w[2])
529                self.seenit(dir)
530                if rdir and not rdir.startswith(srctop):
531                    dir = rdir      # for below
532                    rdir = None
533                else:
534                    return
535
536            objroot = None
537            for dir in [dir,rdir]:
538                if not dir:
539                    continue
540                objroot = self.find_top(dir, self.objroots)
541                if objroot:
542                    break
543            if objroot:
544                ddep = self.find_obj(objroot, dir, path, w[2])
545                if ddep:
546                    self.add(self.obj_deps, ddep, 'obj')
547            else:
548                # don't waste time looking again
549                self.seenit(w[2])
550                self.seenit(dir)
551
552
553def main(argv, klass=MetaFile, xopts='', xoptf=None):
554    """Simple driver for class MetaFile.
555
556    Usage:
557        script [options] [key=value ...] "meta" ...
558
559    Options and key=value pairs contribute to the
560    dictionary passed to MetaFile.
561
562    -S "SRCTOP"
563                add "SRCTOP" to the "SRCTOPS" list.
564
565    -C "CURDIR"
566
567    -O "OBJROOT"
568                add "OBJROOT" to the "OBJROOTS" list.
569
570    -m "MACHINE"
571
572    -a "MACHINE_ARCH"
573
574    -H "HOST_TARGET"
575
576    -D "DPDEPS"
577
578    -d  bumps debug level
579
580    """
581    import getopt
582
583    # import Psyco if we can
584    # it can speed things up quite a bit
585    have_psyco = 0
586    try:
587        import psyco
588        psyco.full()
589        have_psyco = 1
590    except:
591        pass
592
593    conf = {
594        'SRCTOPS': [],
595        'OBJROOTS': [],
596        'EXCLUDES': [],
597        }
598
599    try:
600        machine = os.environ['MACHINE']
601        if machine:
602            conf['MACHINE'] = machine
603        machine_arch = os.environ['MACHINE_ARCH']
604        if machine_arch:
605            conf['MACHINE_ARCH'] = machine_arch
606        srctop = os.environ['SB_SRC']
607        if srctop:
608            conf['SRCTOPS'].append(srctop)
609        objroot = os.environ['SB_OBJROOT']
610        if objroot:
611            conf['OBJROOTS'].append(objroot)
612    except:
613        pass
614
615    debug = 0
616    output = True
617
618    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
619    for o, a in opts:
620        if o == '-a':
621            conf['MACHINE_ARCH'] = a
622        elif o == '-d':
623            debug += 1
624        elif o == '-q':
625            output = False
626        elif o == '-H':
627            conf['HOST_TARGET'] = a
628        elif o == '-S':
629            if a not in conf['SRCTOPS']:
630                conf['SRCTOPS'].append(a)
631        elif o == '-C':
632            conf['CURDIR'] = a
633        elif o == '-O':
634            if a not in conf['OBJROOTS']:
635                conf['OBJROOTS'].append(a)
636        elif o == '-R':
637            conf['RELDIR'] = a
638        elif o == '-D':
639            conf['DPDEPS'] = a
640        elif o == '-m':
641            conf['MACHINE'] = a
642        elif o == '-T':
643            conf['TARGET_SPEC'] = a
644        elif o == '-X':
645            if a not in conf['EXCLUDES']:
646                conf['EXCLUDES'].append(a)
647        elif xoptf:
648            xoptf(o, a, conf)
649
650    conf['debug'] = debug
651
652    # get any var=val assignments
653    eaten = []
654    for a in args:
655        if a.find('=') > 0:
656            k,v = a.split('=')
657            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
658                if k == 'SRCTOP':
659                    k = 'SRCTOPS'
660                elif k == 'OBJROOT':
661                    k = 'OBJROOTS'
662                if v not in conf[k]:
663                    conf[k].append(v)
664            else:
665                conf[k] = v
666            eaten.append(a)
667            continue
668        break
669
670    for a in eaten:
671        args.remove(a)
672
673    debug_out = getv(conf, 'debug_out', sys.stderr)
674
675    if debug:
676        print("config:", file=debug_out)
677        print("psyco=", have_psyco, file=debug_out)
678        for k,v in list(conf.items()):
679            print("%s=%s" % (k,v), file=debug_out)
680
681    m = None
682    for a in args:
683        if a.endswith('.meta'):
684            if not os.path.exists(a):
685                continue
686            m = klass(a, conf)
687        elif a.startswith('@'):
688            # there can actually multiple files per line
689            for line in open(a[1:]):
690                for f in line.strip().split():
691                    if not os.path.exists(f):
692                        continue
693                    m = klass(f, conf)
694
695    if output and m:
696        print(m.dirdeps())
697
698        print(m.src_dirdeps('\nsrc:'))
699
700        dpdeps = getv(conf, 'DPDEPS')
701        if dpdeps:
702            m.file_depends(open(dpdeps, 'wb'))
703
704    return m
705
706if __name__ == '__main__':
707    try:
708        main(sys.argv)
709    except:
710        # yes, this goes to stdout
711        print("ERROR: ", sys.exc_info()[1])
712        raise
713
714