xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision 36d6566e5985030fd2f1100bd9c1387bbe0bd290)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.34 2020/10/02 03:11:17 sjg Exp $
41
42	Copyright (c) 2011-2020, Simon J. Gerraty
43	Copyright (c) 2011-2017, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        if os.path.exists(path):
85            return path
86        if debug > 2:
87            print("skipping non-existent:", path, file=debug_out)
88        return None
89    if path == '.':
90        return cwd
91    if path.startswith('./'):
92        return cwd + path[1:]
93    if last_dir == cwd:
94        last_dir = None
95    for d in [last_dir, cwd]:
96        if not d:
97            continue
98        if path == '..':
99            dw = d.split('/')
100            p = '/'.join(dw[:-1])
101            if not p:
102                p = '/'
103            return p
104        p = '/'.join([d,path])
105        if debug > 2:
106            print("looking for:", p, end=' ', file=debug_out)
107        if not os.path.exists(p):
108            if debug > 2:
109                print("nope", file=debug_out)
110            p = None
111            continue
112        if debug > 2:
113            print("found:", p, file=debug_out)
114        return p
115    return None
116
117def cleanpath(path):
118    """cleanup path without using realpath(3)"""
119    if path.startswith('/'):
120        r = '/'
121    else:
122        r = ''
123    p = []
124    w = path.split('/')
125    for d in w:
126        if not d or d == '.':
127            continue
128        if d == '..':
129            try:
130                p.pop()
131                continue
132            except:
133                break
134        p.append(d)
135
136    return r + '/'.join(p)
137
138def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
139    """
140    Return an absolute path, resolving via cwd or last_dir if needed.
141    this gets called a lot, so we try to avoid calling realpath.
142    """
143    rpath = resolve(path, cwd, last_dir, debug, debug_out)
144    if rpath:
145        path = rpath
146    elif len(path) > 0 and path[0] == '/':
147        return None
148    if (path.find('/') < 0 or
149        path.find('./') > 0 or
150        path.endswith('/..')):
151        path = cleanpath(path)
152    return path
153
154def sort_unique(list, cmp=None, key=None, reverse=False):
155    list.sort(cmp, key, reverse)
156    nl = []
157    le = None
158    for e in list:
159        if e == le:
160            continue
161        le = e
162        nl.append(e)
163    return nl
164
165def add_trims(x):
166    return ['/' + x + '/',
167            '/' + x,
168            x + '/',
169            x]
170
171class MetaFile:
172    """class to parse meta files generated by bmake."""
173
174    conf = None
175    dirdep_re = None
176    host_target = None
177    srctops = []
178    objroots = []
179    excludes = []
180    seen = {}
181    obj_deps = []
182    src_deps = []
183    file_deps = []
184
185    def __init__(self, name, conf={}):
186        """if name is set we will parse it now.
187        conf can have the follwing keys:
188
189        SRCTOPS list of tops of the src tree(s).
190
191        CURDIR  the src directory 'bmake' was run from.
192
193        RELDIR  the relative path from SRCTOP to CURDIR
194
195        MACHINE the machine we built for.
196                set to 'none' if we are not cross-building.
197                More specifically if machine cannot be deduced from objdirs.
198
199        TARGET_SPEC
200                Sometimes MACHINE isn't enough.
201
202        HOST_TARGET
203                when we build for the pseudo machine 'host'
204                the object tree uses HOST_TARGET rather than MACHINE.
205
206        OBJROOTS a list of the common prefix for all obj dirs it might
207                end in '/' or '-'.
208
209        DPDEPS  names an optional file to which per file dependencies
210                will be appended.
211                For example if 'some/path/foo.h' is read from SRCTOP
212                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
213                This can allow 'bmake' to learn all the dirs within
214                the tree that depend on 'foo.h'
215
216        EXCLUDES
217                A list of paths to ignore.
218                ccache(1) can otherwise be trouble.
219
220        debug   desired debug level
221
222        debug_out open file to send debug output to (sys.stderr)
223
224        """
225
226        self.name = name
227        self.debug = getv(conf, 'debug', 0)
228        self.debug_out = getv(conf, 'debug_out', sys.stderr)
229
230        self.machine = getv(conf, 'MACHINE', '')
231        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
232        self.target_spec = getv(conf, 'TARGET_SPEC', '')
233        self.curdir = getv(conf, 'CURDIR')
234        self.reldir = getv(conf, 'RELDIR')
235        self.dpdeps = getv(conf, 'DPDEPS')
236        self.line = 0
237
238        if not self.conf:
239            # some of the steps below we want to do only once
240            self.conf = conf
241            self.host_target = getv(conf, 'HOST_TARGET')
242            for srctop in getv(conf, 'SRCTOPS', []):
243                if srctop[-1] != '/':
244                    srctop += '/'
245                if not srctop in self.srctops:
246                    self.srctops.append(srctop)
247                _srctop = os.path.realpath(srctop)
248                if _srctop[-1] != '/':
249                    _srctop += '/'
250                if not _srctop in self.srctops:
251                    self.srctops.append(_srctop)
252
253            trim_list = add_trims(self.machine)
254            if self.machine == 'host':
255                trim_list += add_trims(self.host_target)
256            if self.target_spec:
257                trim_list += add_trims(self.target_spec)
258
259            for objroot in getv(conf, 'OBJROOTS', []):
260                for e in trim_list:
261                    if objroot.endswith(e):
262                        # this is not what we want - fix it
263                        objroot = objroot[0:-len(e)]
264
265                if objroot[-1] != '/':
266                    objroot += '/'
267                if not objroot in self.objroots:
268                    self.objroots.append(objroot)
269                    _objroot = os.path.realpath(objroot)
270                    if objroot[-1] == '/':
271                        _objroot += '/'
272                    if not _objroot in self.objroots:
273                        self.objroots.append(_objroot)
274
275            # we want the longest match
276            self.srctops.sort(reverse=True)
277            self.objroots.sort(reverse=True)
278
279            self.excludes = getv(conf, 'EXCLUDES', [])
280
281            if self.debug:
282                print("host_target=", self.host_target, file=self.debug_out)
283                print("srctops=", self.srctops, file=self.debug_out)
284                print("objroots=", self.objroots, file=self.debug_out)
285                print("excludes=", self.excludes, file=self.debug_out)
286
287            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
288
289        if self.dpdeps and not self.reldir:
290            if self.debug:
291                print("need reldir:", end=' ', file=self.debug_out)
292            if self.curdir:
293                srctop = self.find_top(self.curdir, self.srctops)
294                if srctop:
295                    self.reldir = self.curdir.replace(srctop,'')
296                    if self.debug:
297                        print(self.reldir, file=self.debug_out)
298            if not self.reldir:
299                self.dpdeps = None      # we cannot do it?
300
301        self.cwd = os.getcwd()          # make sure this is initialized
302        self.last_dir = self.cwd
303
304        if name:
305            self.try_parse()
306
307    def reset(self):
308        """reset state if we are being passed meta files from multiple directories."""
309        self.seen = {}
310        self.obj_deps = []
311        self.src_deps = []
312        self.file_deps = []
313
314    def dirdeps(self, sep='\n'):
315        """return DIRDEPS"""
316        return sep.strip() + sep.join(self.obj_deps)
317
318    def src_dirdeps(self, sep='\n'):
319        """return SRC_DIRDEPS"""
320        return sep.strip() + sep.join(self.src_deps)
321
322    def file_depends(self, out=None):
323        """Append DPDEPS_${file} += ${RELDIR}
324        for each file we saw, to the output file."""
325        if not self.reldir:
326            return None
327        for f in sort_unique(self.file_deps):
328            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
329        # these entries provide for reverse DIRDEPS lookup
330        for f in self.obj_deps:
331            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
332
333    def seenit(self, dir):
334        """rememer that we have seen dir."""
335        self.seen[dir] = 1
336
337    def add(self, list, data, clue=''):
338        """add data to list if it isn't already there."""
339        if data not in list:
340            list.append(data)
341            if self.debug:
342                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
343
344    def find_top(self, path, list):
345        """the logical tree may be split across multiple trees"""
346        for top in list:
347            if path.startswith(top):
348                if self.debug > 2:
349                    print("found in", top, file=self.debug_out)
350                return top
351        return None
352
353    def find_obj(self, objroot, dir, path, input):
354        """return path within objroot, taking care of .dirdep files"""
355        ddep = None
356        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
357            if not ddep and os.path.exists(ddepf):
358                ddep = open(ddepf, 'r').readline().strip('# \n')
359                if self.debug > 1:
360                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
361                if ddep.endswith(self.machine):
362                    ddep = ddep[0:-(1+len(self.machine))]
363                elif self.target_spec and ddep.endswith(self.target_spec):
364                    ddep = ddep[0:-(1+len(self.target_spec))]
365
366        if not ddep:
367            # no .dirdeps, so remember that we've seen the raw input
368            self.seenit(input)
369            self.seenit(dir)
370            if self.machine == 'none':
371                if dir.startswith(objroot):
372                    return dir.replace(objroot,'')
373                return None
374            m = self.dirdep_re.match(dir.replace(objroot,''))
375            if m:
376                ddep = m.group(2)
377                dmachine = m.group(1)
378                if dmachine != self.machine:
379                    if not (self.machine == 'host' and
380                            dmachine == self.host_target):
381                        if self.debug > 2:
382                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
383                        ddep += '.' + dmachine
384
385        return ddep
386
387    def try_parse(self, name=None, file=None):
388        """give file and line number causing exception"""
389        try:
390            self.parse(name, file)
391        except:
392            # give a useful clue
393            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
394            raise
395
396    def parse(self, name=None, file=None):
397        """A meta file looks like:
398
399        # Meta data file "path"
400        CMD "command-line"
401        CWD "cwd"
402        TARGET "target"
403        -- command output --
404        -- filemon acquired metadata --
405        # buildmon version 3
406        V 3
407        C "pid" "cwd"
408        E "pid" "path"
409        F "pid" "child"
410        R "pid" "path"
411        W "pid" "path"
412        X "pid" "status"
413        D "pid" "path"
414        L "pid" "src" "target"
415        M "pid" "old" "new"
416        S "pid" "path"
417        # Bye bye
418
419        We go to some effort to avoid processing a dependency more than once.
420        Of the above record types only C,E,F,L,R,V and W are of interest.
421        """
422
423        version = 0                     # unknown
424        if name:
425            self.name = name;
426        if file:
427            f = file
428            cwd = self.last_dir = self.cwd
429        else:
430            f = open(self.name, 'r')
431        skip = True
432        pid_cwd = {}
433        pid_last_dir = {}
434        last_pid = 0
435
436        self.line = 0
437        if self.curdir:
438            self.seenit(self.curdir)    # we ignore this
439
440        interesting = 'CEFLRV'
441        for line in f:
442            self.line += 1
443            # ignore anything we don't care about
444            if not line[0] in interesting:
445                continue
446            if self.debug > 2:
447                print("input:", line, end=' ', file=self.debug_out)
448            w = line.split()
449
450            if skip:
451                if w[0] == 'V':
452                    skip = False
453                    version = int(w[1])
454                    """
455                    if version < 4:
456                        # we cannot ignore 'W' records
457                        # as they may be 'rw'
458                        interesting += 'W'
459                    """
460                elif w[0] == 'CWD':
461                    self.cwd = cwd = self.last_dir = w[1]
462                    self.seenit(cwd)    # ignore this
463                    if self.debug:
464                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
465                continue
466
467            pid = int(w[1])
468            if pid != last_pid:
469                if last_pid:
470                    pid_last_dir[last_pid] = self.last_dir
471                cwd = getv(pid_cwd, pid, self.cwd)
472                self.last_dir = getv(pid_last_dir, pid, self.cwd)
473                last_pid = pid
474
475            # process operations
476            if w[0] == 'F':
477                npid = int(w[2])
478                pid_cwd[npid] = cwd
479                pid_last_dir[npid] = cwd
480                last_pid = npid
481                continue
482            elif w[0] == 'C':
483                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
484                if not cwd:
485                    cwd = w[2]
486                    if self.debug > 1:
487                        print("missing cwd=", cwd, file=self.debug_out)
488                if cwd.endswith('/.'):
489                    cwd = cwd[0:-2]
490                self.last_dir = pid_last_dir[pid] = cwd
491                pid_cwd[pid] = cwd
492                if self.debug > 1:
493                    print("cwd=", cwd, file=self.debug_out)
494                continue
495
496            if w[2] in self.seen:
497                if self.debug > 2:
498                    print("seen:", w[2], file=self.debug_out)
499                continue
500            # file operations
501            if w[0] in 'ML':
502                # these are special, tread src as read and
503                # target as write
504                self.parse_path(w[2].strip("'"), cwd, 'R', w)
505                self.parse_path(w[3].strip("'"), cwd, 'W', w)
506                continue
507            elif w[0] in 'ERWS':
508                path = w[2]
509                if path == '.':
510                    continue
511                self.parse_path(path, cwd, w[0], w)
512
513        assert(version > 0)
514        if not file:
515            f.close()
516
517    def is_src(self, base, dir, rdir):
518        """is base in srctop"""
519        for dir in [dir,rdir]:
520            if not dir:
521                continue
522            path = '/'.join([dir,base])
523            srctop = self.find_top(path, self.srctops)
524            if srctop:
525                if self.dpdeps:
526                    self.add(self.file_deps, path.replace(srctop,''), 'file')
527                self.add(self.src_deps, dir.replace(srctop,''), 'src')
528                self.seenit(dir)
529                return True
530        return False
531
532    def parse_path(self, path, cwd, op=None, w=[]):
533        """look at a path for the op specified"""
534
535        if not op:
536            op = w[0]
537
538        # we are never interested in .dirdep files as dependencies
539        if path.endswith('.dirdep'):
540            return
541        for p in self.excludes:
542            if p and path.startswith(p):
543                if self.debug > 2:
544                    print("exclude:", p, path, file=self.debug_out)
545                return
546        # we don't want to resolve the last component if it is
547        # a symlink
548        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
549        if not path:
550            return
551        dir,base = os.path.split(path)
552        if dir in self.seen:
553            if self.debug > 2:
554                print("seen:", dir, file=self.debug_out)
555            return
556        # we can have a path in an objdir which is a link
557        # to the src dir, we may need to add dependencies for each
558        rdir = dir
559        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
560        rdir = os.path.realpath(dir)
561        if rdir == dir:
562            rdir = None
563        # now put path back together
564        path = '/'.join([dir,base])
565        if self.debug > 1:
566            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
567        if op in 'RWS':
568            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
569                if self.debug > 1:
570                    print("skipping:", path, file=self.debug_out)
571                return
572            if os.path.isdir(path):
573                if op in 'RW':
574                    self.last_dir = path;
575                if self.debug > 1:
576                    print("ldir=", self.last_dir, file=self.debug_out)
577                return
578
579        if op in 'ER':
580            # finally, we get down to it
581            if dir == self.cwd or dir == self.curdir:
582                return
583            if self.is_src(base, dir, rdir):
584                self.seenit(w[2])
585                if not rdir:
586                    return
587
588            objroot = None
589            for dir in [dir,rdir]:
590                if not dir:
591                    continue
592                objroot = self.find_top(dir, self.objroots)
593                if objroot:
594                    break
595            if objroot:
596                ddep = self.find_obj(objroot, dir, path, w[2])
597                if ddep:
598                    self.add(self.obj_deps, ddep, 'obj')
599                    if self.dpdeps and objroot.endswith('/stage/'):
600                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
601                        self.add(self.file_deps, sp, 'file')
602            else:
603                # don't waste time looking again
604                self.seenit(w[2])
605                self.seenit(dir)
606
607
608def main(argv, klass=MetaFile, xopts='', xoptf=None):
609    """Simple driver for class MetaFile.
610
611    Usage:
612        script [options] [key=value ...] "meta" ...
613
614    Options and key=value pairs contribute to the
615    dictionary passed to MetaFile.
616
617    -S "SRCTOP"
618                add "SRCTOP" to the "SRCTOPS" list.
619
620    -C "CURDIR"
621
622    -O "OBJROOT"
623                add "OBJROOT" to the "OBJROOTS" list.
624
625    -m "MACHINE"
626
627    -a "MACHINE_ARCH"
628
629    -H "HOST_TARGET"
630
631    -D "DPDEPS"
632
633    -d  bumps debug level
634
635    """
636    import getopt
637
638    # import Psyco if we can
639    # it can speed things up quite a bit
640    have_psyco = 0
641    try:
642        import psyco
643        psyco.full()
644        have_psyco = 1
645    except:
646        pass
647
648    conf = {
649        'SRCTOPS': [],
650        'OBJROOTS': [],
651        'EXCLUDES': [],
652        }
653
654    try:
655        machine = os.environ['MACHINE']
656        if machine:
657            conf['MACHINE'] = machine
658        machine_arch = os.environ['MACHINE_ARCH']
659        if machine_arch:
660            conf['MACHINE_ARCH'] = machine_arch
661        srctop = os.environ['SB_SRC']
662        if srctop:
663            conf['SRCTOPS'].append(srctop)
664        objroot = os.environ['SB_OBJROOT']
665        if objroot:
666            conf['OBJROOTS'].append(objroot)
667    except:
668        pass
669
670    debug = 0
671    output = True
672
673    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
674    for o, a in opts:
675        if o == '-a':
676            conf['MACHINE_ARCH'] = a
677        elif o == '-d':
678            debug += 1
679        elif o == '-q':
680            output = False
681        elif o == '-H':
682            conf['HOST_TARGET'] = a
683        elif o == '-S':
684            if a not in conf['SRCTOPS']:
685                conf['SRCTOPS'].append(a)
686        elif o == '-C':
687            conf['CURDIR'] = a
688        elif o == '-O':
689            if a not in conf['OBJROOTS']:
690                conf['OBJROOTS'].append(a)
691        elif o == '-R':
692            conf['RELDIR'] = a
693        elif o == '-D':
694            conf['DPDEPS'] = a
695        elif o == '-m':
696            conf['MACHINE'] = a
697        elif o == '-T':
698            conf['TARGET_SPEC'] = a
699        elif o == '-X':
700            if a not in conf['EXCLUDES']:
701                conf['EXCLUDES'].append(a)
702        elif xoptf:
703            xoptf(o, a, conf)
704
705    conf['debug'] = debug
706
707    # get any var=val assignments
708    eaten = []
709    for a in args:
710        if a.find('=') > 0:
711            k,v = a.split('=')
712            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
713                if k == 'SRCTOP':
714                    k = 'SRCTOPS'
715                elif k == 'OBJROOT':
716                    k = 'OBJROOTS'
717                if v not in conf[k]:
718                    conf[k].append(v)
719            else:
720                conf[k] = v
721            eaten.append(a)
722            continue
723        break
724
725    for a in eaten:
726        args.remove(a)
727
728    debug_out = getv(conf, 'debug_out', sys.stderr)
729
730    if debug:
731        print("config:", file=debug_out)
732        print("psyco=", have_psyco, file=debug_out)
733        for k,v in list(conf.items()):
734            print("%s=%s" % (k,v), file=debug_out)
735
736    m = None
737    for a in args:
738        if a.endswith('.meta'):
739            if not os.path.exists(a):
740                continue
741            m = klass(a, conf)
742        elif a.startswith('@'):
743            # there can actually multiple files per line
744            for line in open(a[1:]):
745                for f in line.strip().split():
746                    if not os.path.exists(f):
747                        continue
748                    m = klass(f, conf)
749
750    if output and m:
751        print(m.dirdeps())
752
753        print(m.src_dirdeps('\nsrc:'))
754
755        dpdeps = getv(conf, 'DPDEPS')
756        if dpdeps:
757            m.file_depends(open(dpdeps, 'wb'))
758
759    return m
760
761if __name__ == '__main__':
762    try:
763        main(sys.argv)
764    except:
765        # yes, this goes to stdout
766        print("ERROR: ", sys.exc_info()[1])
767        raise
768
769