xref: /freebsd/share/mk/meta2deps.py (revision f5f7c05209ca2c3748fd8b27c5e80ffad49120eb)
1#!/usr/bin/env python
2
3"""
4This script parses each "meta" file and extracts the
5information needed to deduce build and src dependencies.
6
7It works much the same as the original shell script, but is
8*much* more efficient.
9
10The parsing work is handled by the class MetaFile.
11We only pay attention to a subset of the information in the
12"meta" files.  Specifically:
13
14'CWD'	to initialize our notion.
15
16'C'	to track chdir(2) on a per process basis
17
18'R'	files read are what we really care about.
19	directories read, provide a clue to resolving
20	subsequent relative paths.  That is if we cannot find
21	them relative to 'cwd', we check relative to the last
22	dir read.
23
24'W'	files opened for write or read-write,
25	for filemon V3 and earlier.
26
27'E'	files executed.
28
29'L'	files linked
30
31'V'	the filemon version, this record is used as a clue
32	that we have reached the interesting bit.
33
34"""
35
36"""
37RCSid:
38	$Id: meta2deps.py,v 1.8 2013/02/10 19:21:46 sjg Exp $
39
40	Copyright (c) 2011-2013, Juniper Networks, Inc.
41	All rights reserved.
42
43	Redistribution and use in source and binary forms, with or without
44	modification, are permitted provided that the following conditions
45	are met:
46	1. Redistributions of source code must retain the above copyright
47	   notice, this list of conditions and the following disclaimer.
48	2. Redistributions in binary form must reproduce the above copyright
49	   notice, this list of conditions and the following disclaimer in the
50	   documentation and/or other materials provided with the distribution.
51
52	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
53	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
54	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
55	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
56	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
57	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
58	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
59	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
60	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
61	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
62	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
63
64"""
65
66import os, re, sys
67
68def getv(dict, key, d=None):
69    """Lookup key in dict and return value or the supplied default."""
70    if key in dict:
71        return dict[key]
72    return d
73
74def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
75    """
76    Return an absolute path, resolving via cwd or last_dir if needed.
77    """
78    if path.endswith('/.'):
79        path = path[0:-2]
80    if path[0] == '/':
81        return path
82    if path == '.':
83        return cwd
84    if path.startswith('./'):
85        return cwd + path[1:]
86    if last_dir == cwd:
87        last_dir = None
88    for d in [last_dir, cwd]:
89        if not d:
90            continue
91        p = '/'.join([d,path])
92        if debug > 2:
93            print >> debug_out, "looking for:", p,
94        if not os.path.exists(p):
95            if debug > 2:
96                print >> debug_out, "nope"
97            p = None
98            continue
99        if debug > 2:
100            print >> debug_out, "found:", p
101        return p
102    return None
103
104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
105    """
106    Return an absolute path, resolving via cwd or last_dir if needed.
107    this gets called a lot, so we try to avoid calling realpath
108    until we know we have something.
109    """
110    path = resolve(path, cwd, last_dir, debug, debug_out)
111    if path and (path.find('./') > 0 or
112                 path.endswith('/..') or
113                 os.path.islink(path)):
114        return os.path.realpath(path)
115    return path
116
117def sort_unique(list, cmp=None, key=None, reverse=False):
118    list.sort(cmp, key, reverse)
119    nl = []
120    le = None
121    for e in list:
122        if e == le:
123            continue
124        nl.append(e)
125    return nl
126
127class MetaFile:
128    """class to parse meta files generated by bmake."""
129
130    conf = None
131    dirdep_re = None
132    host_target = None
133    srctops = []
134    objroots = []
135
136    seen = {}
137    obj_deps = []
138    src_deps = []
139    file_deps = []
140
141    def __init__(self, name, conf={}):
142        """if name is set we will parse it now.
143        conf can have the follwing keys:
144
145        SRCTOPS	list of tops of the src tree(s).
146
147        CURDIR	the src directory 'bmake' was run from.
148
149        RELDIR	the relative path from SRCTOP to CURDIR
150
151        MACHINE	the machine we built for.
152        	set to 'none' if we are not cross-building.
153
154        HOST_TARGET
155		when we build for the psuedo machine 'host'
156		the object tree uses HOST_TARGET rather than MACHINE.
157
158        OBJROOTS a list of the common prefix for all obj dirs it might
159		end in '/' or '-'.
160
161        DPDEPS	names an optional file to which per file dependencies
162		will be appended.
163		For example if 'some/path/foo.h' is read from SRCTOP
164		then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
165		This can allow 'bmake' to learn all the dirs within
166 		the tree that depend on 'foo.h'
167
168        debug	desired debug level
169
170        debug_out open file to send debug output to (sys.stderr)
171
172        """
173
174        self.name = name
175        self.debug = getv(conf, 'debug', 0)
176        self.debug_out = getv(conf, 'debug_out', sys.stderr)
177
178        self.machine = getv(conf, 'MACHINE', '')
179
180        if not self.conf:
181            # some of the steps below we want to do only once
182            self.conf = conf
183            self.host_target = getv(conf, 'HOST_TARGET')
184            for srctop in getv(conf, 'SRCTOPS', []):
185                if srctop[-1] != '/':
186                    srctop += '/'
187                if not srctop in self.srctops:
188                    self.srctops.append(srctop)
189                _srctop = os.path.realpath(srctop)
190                if _srctop[-1] != '/':
191                    _srctop += '/'
192                if not _srctop in self.srctops:
193                    self.srctops.append(_srctop)
194
195            for objroot in getv(conf, 'OBJROOTS', []):
196                for e in ['/' + self.machine, '/' + self.machine + '/']:
197                    if objroot.endswith(e):
198                        # this is not what we want - fix it
199                        objroot = objroot[0:-len(e)]
200                        if e.endswith('/'):
201                            objroot += '/'
202                if not objroot in self.objroots:
203                    self.objroots.append(objroot)
204                    _objroot = os.path.realpath(objroot)
205                    if objroot[-1] == '/':
206                        _objroot += '/'
207                    if not _objroot in self.objroots:
208                        self.objroots.append(_objroot)
209
210            # we want the longest match
211            self.srctops.sort(reverse=True)
212            self.objroots.sort(reverse=True)
213
214            if self.debug:
215                print >> self.debug_out, "host_target=", self.host_target
216                print >> self.debug_out, "srctops=", self.srctops
217                print >> self.debug_out, "objroots=", self.objroots
218
219            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
220
221        self.curdir = getv(conf, 'CURDIR')
222        self.reldir = getv(conf, 'RELDIR')
223        self.dpdeps = getv(conf, 'DPDEPS')
224        if self.dpdeps and not self.reldir:
225            if self.debug:
226                print >> self.debug_out, "need reldir:",
227            if self.curdir:
228                srctop = self.find_top(self.curdir, self.srctops)
229                if srctop:
230                    self.reldir = self.curdir.replace(srctop,'')
231                    if self.debug:
232                        print >> self.debug_out, self.reldir
233            if not self.reldir:
234                self.dpdeps = None      # we cannot do it?
235
236        if name:
237            self.parse()
238
239    def reset(self):
240        """reset state if we are being passed meta files from multiple directories."""
241        self.seen = {}
242        self.obj_deps = []
243        self.src_deps = []
244        self.file_deps = []
245
246    def dirdeps(self, sep='\n'):
247        """return DIRDEPS"""
248        return sep.strip() + sep.join(self.obj_deps)
249
250    def src_dirdeps(self, sep='\n'):
251        """return SRC_DIRDEPS"""
252        return sep.strip() + sep.join(self.src_deps)
253
254    def file_depends(self, out=None):
255        """Append DPDEPS_${file} += ${RELDIR}
256        for each file we saw, to the output file."""
257        if not self.reldir:
258            return None
259        for f in sort_unique(self.file_deps):
260            print >> out, 'DPDEPS_%s += %s' % (f, self.reldir)
261
262    def seenit(self, dir):
263        """rememer that we have seen dir."""
264        self.seen[dir] = 1
265
266    def add(self, list, data, clue=''):
267        """add data to list if it isn't already there."""
268        if data not in list:
269            list.append(data)
270            if self.debug:
271                print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data)
272
273    def find_top(self, path, list):
274        """the logical tree may be split accross multiple trees"""
275        for top in list:
276            if path.startswith(top):
277                if self.debug > 2:
278                    print >> self.debug_out, "found in", top
279                return top
280        return None
281
282    def find_obj(self, objroot, dir, path, input):
283        """return path within objroot, taking care of .dirdep files"""
284        ddep = None
285        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
286            if not ddep and os.path.exists(ddepf):
287                ddep = open(ddepf, 'rb').readline().strip('# \n')
288                if self.debug > 1:
289                    print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep)
290                if ddep.endswith(self.machine):
291                    ddep = ddep[0:-(1+len(self.machine))]
292
293        if not ddep:
294            # no .dirdeps, so remember that we've seen the raw input
295            self.seenit(input)
296            self.seenit(dir)
297            if self.machine == 'none':
298                if dir.startswith(objroot):
299                    return dir.replace(objroot,'')
300                return None
301            m = self.dirdep_re.match(dir.replace(objroot,''))
302            if m:
303                ddep = m.group(2)
304                dmachine = m.group(1)
305                if dmachine != self.machine:
306                    if not (self.machine == 'host' and
307                            dmachine == self.host_target):
308                        if self.debug > 2:
309                            print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep)
310                        ddep += '.' + dmachine
311
312        return ddep
313
314    def parse(self, name=None, file=None):
315        """A meta file looks like:
316
317	# Meta data file "path"
318	CMD "command-line"
319	CWD "cwd"
320	TARGET "target"
321	-- command output --
322	-- filemon acquired metadata --
323	# buildmon version 3
324	V 3
325	C "pid" "cwd"
326	E "pid" "path"
327        F "pid" "child"
328	R "pid" "path"
329	W "pid" "path"
330	X "pid" "status"
331        D "pid" "path"
332        L "pid" "src" "target"
333        M "pid" "old" "new"
334        S "pid" "path"
335        # Bye bye
336
337        We go to some effort to avoid processing a dependency more than once.
338        Of the above record types only C,E,F,L,R,V and W are of interest.
339        """
340
341        version = 0                     # unknown
342        if name:
343            self.name = name;
344        if file:
345            f = file
346            cwd = last_dir = self.cwd
347        else:
348            f = open(self.name, 'rb')
349        skip = True
350        pid_cwd = {}
351        pid_last_dir = {}
352        last_pid = 0
353
354        if self.curdir:
355            self.seenit(self.curdir)    # we ignore this
356
357        interesting = 'CEFLRV'
358        for line in f:
359            # ignore anything we don't care about
360            if not line[0] in interesting:
361                continue
362            if self.debug > 2:
363                print >> self.debug_out, "input:", line,
364            w = line.split()
365
366            if skip:
367                if w[0] == 'V':
368                    skip = False
369                    version = int(w[1])
370                    """
371                    if version < 4:
372                        # we cannot ignore 'W' records
373                        # as they may be 'rw'
374                        interesting += 'W'
375                    """
376                elif w[0] == 'CWD':
377                    self.cwd = cwd = last_dir = w[1]
378                    self.seenit(cwd)    # ignore this
379                    if self.debug:
380                        print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd)
381                continue
382
383            pid = int(w[1])
384            if pid != last_pid:
385                if last_pid:
386                    pid_cwd[last_pid] = cwd
387                    pid_last_dir[last_pid] = last_dir
388                cwd = getv(pid_cwd, pid, self.cwd)
389                last_dir = getv(pid_last_dir, pid, self.cwd)
390                last_pid = pid
391
392            # process operations
393            if w[0] == 'F':
394                npid = int(w[2])
395                pid_cwd[npid] = cwd
396                pid_last_dir[npid] = cwd
397                last_pid = npid
398                continue
399            elif w[0] == 'C':
400                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
401                if cwd.endswith('/.'):
402                    cwd = cwd[0:-2]
403                last_dir = cwd
404                if self.debug > 1:
405                    print >> self.debug_out, "cwd=", cwd
406                continue
407
408            if w[2] in self.seen:
409                if self.debug > 2:
410                    print >> self.debug_out, "seen:", w[2]
411                continue
412            # file operations
413            if w[0] in 'ML':
414                path = w[2].strip("'")
415            else:
416                path = w[2]
417            # we are never interested in .dirdep files as dependencies
418            if path.endswith('.dirdep'):
419                continue
420            # we don't want to resolve the last component if it is
421            # a symlink
422            path = resolve(path, cwd, last_dir, self.debug, self.debug_out)
423            if not path:
424                continue
425            dir,base = os.path.split(path)
426            if dir in self.seen:
427                if self.debug > 2:
428                    print >> self.debug_out, "seen:", dir
429                continue
430            # we can have a path in an objdir which is a link
431            # to the src dir, we may need to add dependencies for each
432            rdir = dir
433            dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out)
434            if rdir == dir or rdir.find('./') > 0:
435                rdir = None
436            # now put path back together
437            path = '/'.join([dir,base])
438            if self.debug > 1:
439                print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path)
440            if w[0] in 'SRWL':
441                if w[0] == 'W' and path.endswith('.dirdep'):
442                    continue
443                if path in [last_dir, cwd, self.cwd, self.curdir]:
444                    if self.debug > 1:
445                        print >> self.debug_out, "skipping:", path
446                    continue
447                if os.path.isdir(path):
448                    if w[0] in 'RW':
449                        last_dir = path;
450                    if self.debug > 1:
451                        print >> self.debug_out, "ldir=", last_dir
452                    continue
453
454            if w[0] in 'REWML':
455                # finally, we get down to it
456                if dir == self.cwd or dir == self.curdir:
457                    continue
458                srctop = self.find_top(path, self.srctops)
459                if srctop:
460                    if self.dpdeps:
461                        self.add(self.file_deps, path.replace(srctop,''), 'file')
462                    self.add(self.src_deps, dir.replace(srctop,''), 'src')
463                    self.seenit(w[2])
464                    self.seenit(dir)
465                    if rdir and not rdir.startswith(srctop):
466                        dir = rdir      # for below
467                        rdir = None
468                    else:
469                        continue
470
471                objroot = None
472                for dir in [dir,rdir]:
473                    if not dir:
474                        continue
475                    objroot = self.find_top(dir, self.objroots)
476                    if objroot:
477                        break
478                if objroot:
479                    ddep = self.find_obj(objroot, dir, path, w[2])
480                    if ddep:
481                        self.add(self.obj_deps, ddep, 'obj')
482                else:
483                    # don't waste time looking again
484                    self.seenit(w[2])
485                    self.seenit(dir)
486        if not file:
487            f.close()
488
489
490def main(argv, klass=MetaFile, xopts='', xoptf=None):
491    """Simple driver for class MetaFile.
492
493    Usage:
494    	script [options] [key=value ...] "meta" ...
495
496    Options and key=value pairs contribute to the
497    dictionary passed to MetaFile.
498
499    -S "SRCTOP"
500		add "SRCTOP" to the "SRCTOPS" list.
501
502    -C "CURDIR"
503
504    -O "OBJROOT"
505    		add "OBJROOT" to the "OBJROOTS" list.
506
507    -m "MACHINE"
508
509    -H "HOST_TARGET"
510
511    -D "DPDEPS"
512
513    -d	bumps debug level
514
515    """
516    import getopt
517
518    # import Psyco if we can
519    # it can speed things up quite a bit
520    have_psyco = 0
521    try:
522        import psyco
523        psyco.full()
524        have_psyco = 1
525    except:
526        pass
527
528    conf = {
529        'SRCTOPS': [],
530        'OBJROOTS': [],
531        }
532
533    try:
534        machine = os.environ['MACHINE']
535        if machine:
536            conf['MACHINE'] = machine
537        srctop = os.environ['SB_SRC']
538        if srctop:
539            conf['SRCTOPS'].append(srctop)
540        objroot = os.environ['SB_OBJROOT']
541        if objroot:
542            conf['OBJROOTS'].append(objroot)
543    except:
544        pass
545
546    debug = 0
547    output = True
548
549    opts, args = getopt.getopt(argv[1:], 'dS:C:O:R:m:D:H:q' + xopts)
550    for o, a in opts:
551        if o == '-d':
552            debug += 1
553        elif o == '-q':
554            output = False
555        elif o == '-H':
556            conf['HOST_TARGET'] = a
557        elif o == '-S':
558            if a not in conf['SRCTOPS']:
559                conf['SRCTOPS'].append(a)
560        elif o == '-C':
561            conf['CURDIR'] = a
562        elif o == '-O':
563            if a not in conf['OBJROOTS']:
564                conf['OBJROOTS'].append(a)
565        elif o == '-R':
566            conf['RELDIR'] = a
567        elif o == '-D':
568            conf['DPDEPS'] = a
569        elif o == '-m':
570            conf['MACHINE'] = a
571        elif xoptf:
572            xoptf(o, a, conf)
573
574    conf['debug'] = debug
575
576    # get any var=val assignments
577    eaten = []
578    for a in args:
579        if a.find('=') > 0:
580            k,v = a.split('=')
581            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
582                if k == 'SRCTOP':
583                    k = 'SRCTOPS'
584                elif k == 'OBJROOT':
585                    k = 'OBJROOTS'
586                if v not in conf[k]:
587                    conf[k].append(v)
588            else:
589                conf[k] = v
590            eaten.append(a)
591            continue
592        break
593
594    for a in eaten:
595        args.remove(a)
596
597    debug_out = getv(conf, 'debug_out', sys.stderr)
598
599    if debug:
600        print >> debug_out, "config:"
601        print >> debug_out, "psyco=", have_psyco
602        for k,v in conf.items():
603            print >> debug_out, "%s=%s" % (k,v)
604
605    for a in args:
606        m = klass(a, conf)
607
608    if output:
609        print m.dirdeps()
610
611        print m.src_dirdeps('\nsrc:')
612
613        dpdeps = getv(conf, 'DPDEPS')
614        if dpdeps:
615            m.file_depends(open(dpdeps, 'wb'))
616
617    return m
618
619if __name__ == '__main__':
620    try:
621        main(sys.argv)
622    except:
623        # yes, this goes to stdout
624        print "ERROR: ", sys.exc_info()[1]
625        raise
626
627