xref: /freebsd/share/mk/meta2deps.py (revision 7750ad47a9a7dbc83f87158464170c8640723293)
1#!/usr/bin/env python
2
3"""
4This script parses each "meta" file and extracts the
5information needed to deduce build and src dependencies.
6
7It works much the same as the original shell script, but is
8*much* more efficient.
9
10The parsing work is handled by the class MetaFile.
11We only pay attention to a subset of the information in the
12"meta" files.  Specifically:
13
14'CWD'	to initialize our notion.
15
16'C'	to track chdir(2) on a per process basis
17
18'R'	files read are what we really care about.
19	directories read, provide a clue to resolving
20	subsequent relative paths.  That is if we cannot find
21	them relative to 'cwd', we check relative to the last
22	dir read.
23
24'W'	files opened for write or read-write,
25	for filemon V3 and earlier.
26
27'E'	files executed.
28
29'L'	files linked
30
31'V'	the filemon version, this record is used as a clue
32	that we have reached the interesting bit.
33
34"""
35
36"""
37RCSid:
38	$Id: meta2deps.py,v 1.5 2011/11/14 00:18:42 sjg Exp $
39
40	Copyright (c) 2011, Juniper Networks, Inc.
41
42	Redistribution and use in source and binary forms, with or without
43	modification, are permitted provided that the following conditions
44	are met:
45	1. Redistributions of source code must retain the above copyright
46	   notice, this list of conditions and the following disclaimer.
47	2. Redistributions in binary form must reproduce the above copyright
48	   notice, this list of conditions and the following disclaimer in the
49	   documentation and/or other materials provided with the distribution.
50
51	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
54	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
55	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
56	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
57	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
58	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
59	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
60	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
61	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
62
63"""
64
65import os, re, sys
66
67def getv(dict, key, d=None):
68    """Lookup key in dict and return value or the supplied default."""
69    if key in dict:
70        return dict[key]
71    return d
72
73def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
74    """
75    Return an absolute path, resolving via cwd or last_dir if needed.
76    """
77    if path.endswith('/.'):
78        path = path[0:-2]
79    if path[0] == '/':
80        return path
81    if path == '.':
82        return cwd
83    if path.startswith('./'):
84        return cwd + path[1:]
85    if last_dir == cwd:
86        last_dir = None
87    for d in [last_dir, cwd]:
88        if not d:
89            continue
90        p = '/'.join([d,path])
91        if debug > 2:
92            print >> debug_out, "looking for:", p,
93        if not os.path.exists(p):
94            if debug > 2:
95                print >> debug_out, "nope"
96            p = None
97            continue
98        if debug > 2:
99            print >> debug_out, "found:", p
100        return p
101    return None
102
103def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
104    """
105    Return an absolute path, resolving via cwd or last_dir if needed.
106    this gets called a lot, so we try to avoid calling realpath
107    until we know we have something.
108    """
109    path = resolve(path, cwd, last_dir, debug, debug_out)
110    if path and (path.find('./') > 0 or
111                 path.endswith('/..') or
112                 os.path.islink(path)):
113        return os.path.realpath(path)
114    return path
115
116def sort_unique(list, cmp=None, key=None, reverse=False):
117    list.sort(cmp, key, reverse)
118    nl = []
119    le = None
120    for e in list:
121        if e == le:
122            continue
123        nl.append(e)
124    return nl
125
126class MetaFile:
127    """class to parse meta files generated by bmake."""
128
129    conf = None
130    dirdep_re = None
131    host_target = None
132    srctops = []
133    objroots = []
134
135    seen = {}
136    obj_deps = []
137    src_deps = []
138    file_deps = []
139
140    def __init__(self, name, conf={}):
141        """if name is set we will parse it now.
142        conf can have the follwing keys:
143
144        SRCTOPS	list of tops of the src tree(s).
145
146        CURDIR	the src directory 'bmake' was run from.
147
148        RELDIR	the relative path from SRCTOP to CURDIR
149
150        MACHINE	the machine we built for.
151        	set to 'none' if we are not cross-building.
152
153        HOST_TARGET
154		when we build for the psuedo machine 'host'
155		the object tree uses HOST_TARGET rather than MACHINE.
156
157        OBJROOTS a list of the common prefix for all obj dirs it might
158		end in '/' or '-'.
159
160        DPDEPS	names an optional file to which per file dependencies
161		will be appended.
162		For example if 'some/path/foo.h' is read from SRCTOP
163		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
164		This can allow 'bmake' to learn all the dirs within
165 		the tree that depend on 'foo.h'
166
167        debug	desired debug level
168
169        debug_out open file to send debug output to (sys.stderr)
170
171        """
172
173        self.name = name
174        self.debug = getv(conf, 'debug', 0)
175        self.debug_out = getv(conf, 'debug_out', sys.stderr)
176
177        if not self.conf:
178            # some of the steps below we want to do only once
179            self.conf = conf
180            self.host_target = getv(conf, 'HOST_TARGET')
181            for srctop in getv(conf, 'SRCTOPS', []):
182                if srctop[-1] != '/':
183                    srctop += '/'
184                if not srctop in self.srctops:
185                    self.srctops.append(srctop)
186
187            for objroot in getv(conf, 'OBJROOTS', []):
188                if not objroot in self.objroots:
189                    self.objroots.append(objroot)
190                    _objroot = os.path.realpath(objroot)
191                    if objroot[-1] == '/':
192                        _objroot += '/'
193                        if not _objroot in self.objroots:
194                            self.objroots.append(_objroot)
195
196            if self.debug:
197                print >> self.debug_out, "host_target=", self.host_target
198                print >> self.debug_out, "srctops=", self.srctops
199                print >> self.debug_out, "objroots=", self.objroots
200
201            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
202
203        self.curdir = getv(conf, 'CURDIR')
204        self.machine = getv(conf, 'MACHINE', '')
205        self.reldir = getv(conf, 'RELDIR')
206        self.dpdeps = getv(conf, 'DPDEPS')
207        if self.dpdeps and not self.reldir:
208            if self.debug:
209                print >> self.debug_out, "need reldir:",
210            if self.curdir:
211                srctop = self.find_top(self.curdir, self.srctops)
212                if srctop:
213                    self.reldir = self.curdir.replace(srctop,'')
214                    if self.debug:
215                        print >> self.debug_out, self.reldir
216            if not self.reldir:
217                self.dpdeps = None      # we cannot do it?
218
219        if name:
220            self.parse()
221
222    def reset(self):
223        """reset state if we are being passed meta files from multiple directories."""
224        self.seen = {}
225        self.obj_deps = []
226        self.src_deps = []
227        self.file_deps = []
228
229    def dirdeps(self, sep='\n'):
230        """return DIRDEPS"""
231        return sep.strip() + sep.join(self.obj_deps)
232
233    def src_dirdeps(self, sep='\n'):
234        """return SRC_DIRDEPS"""
235        return sep.strip() + sep.join(self.src_deps)
236
237    def file_depends(self, out=None):
238        """Append DPDEPS_${file} += ${RELDIR}
239        for each file we saw, to the output file."""
240        if not self.reldir:
241            return None
242        for f in sort_unique(self.file_deps):
243            print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
244
245    def seenit(self, dir):
246        """rememer that we have seen dir."""
247        self.seen[dir] = 1
248
249    def add(self, list, data, clue=''):
250        """add data to list if it isn't already there."""
251        if data not in list:
252            list.append(data)
253            if self.debug:
254                print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
255
256    def find_top(self, path, list):
257        """the logical tree may be split accross multiple trees"""
258        for top in list:
259            if path.startswith(top):
260                if self.debug > 2:
261                    print >> self.debug_out, "found in", top
262                return top
263        return None
264
265    def find_obj(self, objroot, dir, path, input):
266        """return path within objroot, taking care of .dirdep files"""
267        ddep = None
268        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
269            if not ddep and os.path.exists(ddepf):
270                ddep = open(ddepf, 'rb').readline().strip('# \n')
271                if self.debug > 1:
272                    print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
273                if ddep.endswith(self.machine):
274                    ddep = ddep[0:-(1+len(self.machine))]
275
276        if not ddep:
277            # no .dirdeps, so remember that we've seen the raw input
278            self.seenit(input)
279            self.seenit(dir)
280            if self.machine == 'none':
281                if dir.startswith(objroot):
282                    return dir.replace(objroot,'')
283                return None
284            m = self.dirdep_re.match(dir.replace(objroot,''))
285            if m:
286                ddep = m.group(2)
287                dmachine = m.group(1)
288                if dmachine != self.machine:
289                    if not (self.machine == 'host' and
290                            dmachine == self.host_target):
291                        if self.debug > 2:
292                            print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
293                        ddep += '.' + dmachine
294
295        return ddep
296
297    def parse(self, name=None, file=None):
298        """A meta file looks like:
299
300	# Meta data file "path"
301	CMD "command-line"
302	CWD "cwd"
303	TARGET "target"
304	-- command output --
305	-- filemon acquired metadata --
306	# buildmon version 3
307	V 3
308	C "pid" "cwd"
309	E "pid" "path"
310        F "pid" "child"
311	R "pid" "path"
312	W "pid" "path"
313	X "pid" "status"
314        D "pid" "path"
315        L "pid" "src" "target"
316        M "pid" "old" "new"
317        S "pid" "path"
318        # Bye bye
319
320        We go to some effort to avoid processing a dependency more than once.
321        Of the above record types only C,E,F,L,R,V and W are of interest.
322        """
323
324        version = 0                     # unknown
325        if name:
326            self.name = name;
327        if file:
328            f = file
329            cwd = last_dir = self.cwd
330        else:
331            f = open(self.name, 'rb')
332        skip = True
333        pid_cwd = {}
334        pid_last_dir = {}
335        last_pid = 0
336
337        if self.curdir:
338            self.seenit(self.curdir)    # we ignore this
339
340        interesting = 'CEFLRV'
341        for line in f:
342            # ignore anything we don't care about
343            if not line[0] in interesting:
344                continue
345            if self.debug > 2:
346                print >> self.debug_out, "input:", line,
347            w = line.split()
348
349            if skip:
350                if w[0] == 'V':
351                    skip = False
352                    version = int(w[1])
353                    """
354                    if version < 4:
355                        # we cannot ignore 'W' records
356                        # as they may be 'rw'
357                        interesting += 'W'
358                    """
359                elif w[0] == 'CWD':
360                    self.cwd = cwd = last_dir = w[1]
361                    self.seenit(cwd)    # ignore this
362                    if self.debug:
363                        print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
364                continue
365
366            pid = int(w[1])
367            if pid != last_pid:
368                if last_pid:
369                    pid_cwd[last_pid] = cwd
370                    pid_last_dir[last_pid] = last_dir
371                cwd = getv(pid_cwd, pid, self.cwd)
372                last_dir = getv(pid_last_dir, pid, self.cwd)
373                last_pid = pid
374
375            # process operations
376            if w[0] == 'F':
377                npid = int(w[2])
378                pid_cwd[npid] = cwd
379                pid_last_dir[npid] = cwd
380                last_pid = npid
381                continue
382            elif w[0] == 'C':
383                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
384                if cwd.endswith('/.'):
385                    cwd = cwd[0:-2]
386                last_dir = cwd
387                if self.debug > 1:
388                    print >> self.debug_out, "cwd=", cwd
389                continue
390
391            if w[2] in self.seen:
392                if self.debug > 2:
393                    print >> self.debug_out, "seen:", w[2]
394                continue
395            # file operations
396            if w[0] in 'ML':
397                path = w[2].strip("'")
398            else:
399                path = w[2]
400            # we don't want to resolve the last component if it is
401            # a symlink
402            path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
403            if not path:
404                continue
405            dir,base = os.path.split(path)
406            if dir in self.seen:
407                if self.debug > 2:
408                    print >> self.debug_out, "seen:", dir
409                continue
410            # we can have a path in an objdir which is a link
411            # to the src dir, we may need to add dependencies for each
412            rdir = dir
413            dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
414            if rdir == dir or rdir.find('./') > 0:
415                rdir = None
416            # now put path back together
417            path = '/'.join([dir,base])
418            if self.debug > 1:
419                print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
420            if w[0] in 'SRWL':
421                if w[0] == 'W' and path.endswith('.dirdep'):
422                    continue
423                if path in [last_dir, cwd, self.cwd, self.curdir]:
424                    if self.debug > 1:
425                        print >> self.debug_out, "skipping:", path
426                    continue
427                if os.path.isdir(path):
428                    if w[0] in 'RW':
429                        last_dir = path;
430                    if self.debug > 1:
431                        print >> self.debug_out, "ldir=", last_dir
432                    continue
433
434            if w[0] in 'REWML':
435                # finally, we get down to it
436                if dir == self.cwd or dir == self.curdir:
437                    continue
438                srctop = self.find_top(path, self.srctops)
439                if srctop:
440                    if self.dpdeps:
441                        self.add(self.file_deps, path.replace(srctop,''), 'file')
442                    self.add(self.src_deps, dir.replace(srctop,''), 'src')
443                    self.seenit(w[2])
444                    self.seenit(dir)
445                    if rdir and not rdir.startswith(srctop):
446                        dir = rdir      # for below
447                        rdir = None
448                    else:
449                        continue
450
451                objroot = None
452                for dir in [dir,rdir]:
453                    if not dir:
454                        continue
455                    objroot = self.find_top(dir, self.objroots)
456                    if objroot:
457                        break
458                if objroot:
459                    ddep = self.find_obj(objroot, dir, path, w[2])
460                    if ddep:
461                        self.add(self.obj_deps, ddep, 'obj')
462                else:
463                    # don't waste time looking again
464                    self.seenit(w[2])
465                    self.seenit(dir)
466        if not file:
467            f.close()
468
469
470def main(argv, klass=MetaFile, xopts='', xoptf=None):
471    """Simple driver for class MetaFile.
472
473    Usage:
474    	script [options] [key=value ...] "meta" ...
475
476    Options and key=value pairs contribute to the
477    dictionary passed to MetaFile.
478
479    -S "SRCTOP"
480		add "SRCTOP" to the "SRCTOPS" list.
481
482    -C "CURDIR"
483
484    -O "OBJROOT"
485    		add "OBJROOT" to the "OBJROOTS" list.
486
487    -m "MACHINE"
488
489    -H "HOST_TARGET"
490
491    -D "DPDEPS"
492
493    -d	bumps debug level
494
495    """
496    import getopt
497
498    # import Psyco if we can
499    # it can speed things up quite a bit
500    have_psyco = 0
501    try:
502        import psyco
503        psyco.full()
504        have_psyco = 1
505    except:
506        pass
507
508    conf = {
509        'SRCTOPS': [],
510        'OBJROOTS': [],
511        }
512
513    try:
514        machine = os.environ['MACHINE']
515        if machine:
516            conf['MACHINE'] = machine
517        srctop = os.environ['SB_SRC']
518        if srctop:
519            conf['SRCTOPS'].append(srctop)
520        objroot = os.environ['SB_OBJROOT']
521        if objroot:
522            conf['OBJROOTS'].append(objroot)
523    except:
524        pass
525
526    debug = 0
527    output = True
528
529    opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts)
530    for o, a in opts:
531        if o == '-d':
532            debug += 1
533        elif o == '-q':
534            output = False
535        elif o == '-H':
536            conf['HOST_TARGET'] = a
537        elif o == '-S':
538            if a not in conf['SRCTOPS']:
539                conf['SRCTOPS'].append(a)
540        elif o == '-C':
541            conf['CURDIR'] = a
542        elif o == '-O':
543            if a not in conf['OBJROOTS']:
544                conf['OBJROOTS'].append(a)
545        elif o == '-R':
546            conf['RELDIR'] = a
547        elif o == '-D':
548            conf['DPDEPS'] = a
549        elif o == '-m':
550            conf['MACHINE'] = a
551        elif xoptf:
552            xoptf(o, a, conf)
553
554    conf['debug'] = debug
555
556    # get any var=val assignments
557    eaten = []
558    for a in args:
559        if a.find('=') > 0:
560            k,v = a.split('=')
561            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
562                if k == 'SRCTOP':
563                    k = 'SRCTOPS'
564                elif k == 'OBJROOT':
565                    k = 'OBJROOTS'
566                if v not in conf[k]:
567                    conf[k].append(v)
568            else:
569                conf[k] = v
570            eaten.append(a)
571            continue
572        break
573
574    for a in eaten:
575        args.remove(a)
576
577    debug_out = getv(conf, 'debug_out', sys.stderr)
578
579    if debug:
580        print >> debug_out, "config:"
581        print >> debug_out, "psyco=", have_psyco
582        for k,v in conf.items():
583            print >> debug_out, "%s=%s" % (k,v)
584
585    for a in args:
586        m = klass(a, conf)
587
588    if output:
589        print m.dirdeps()
590
591        print m.src_dirdeps('\nsrc:')
592
593        dpdeps = getv(conf, 'DPDEPS')
594        if dpdeps:
595            m.file_depends(open(dpdeps, 'wb'))
596
597    return m
598
599if __name__ == '__main__':
600    try:
601        main(sys.argv)
602    except:
603        # yes, this goes to stdout
604        print "ERROR: ", sys.exc_info()[1]
605        raise
606
607