xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision 72143e89bb430c3b1406b399b810806904f6c882)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.33 2020/08/19 17:51:53 sjg Exp $
41
42	Copyright (c) 2011-2020, Simon J. Gerraty
43	Copyright (c) 2011-2017, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        if os.path.exists(path):
85            return path
86        if debug > 2:
87            print("skipping non-existent:", path, file=debug_out)
88        return None
89    if path == '.':
90        return cwd
91    if path.startswith('./'):
92        return cwd + path[1:]
93    if last_dir == cwd:
94        last_dir = None
95    for d in [last_dir, cwd]:
96        if not d:
97            continue
98        if path == '..':
99            dw = d.split('/')
100            p = '/'.join(dw[:-1])
101            if not p:
102                p = '/'
103            return p
104        p = '/'.join([d,path])
105        if debug > 2:
106            print("looking for:", p, end=' ', file=debug_out)
107        if not os.path.exists(p):
108            if debug > 2:
109                print("nope", file=debug_out)
110            p = None
111            continue
112        if debug > 2:
113            print("found:", p, file=debug_out)
114        return p
115    return None
116
117def cleanpath(path):
118    """cleanup path without using realpath(3)"""
119    if path.startswith('/'):
120        r = '/'
121    else:
122        r = ''
123    p = []
124    w = path.split('/')
125    for d in w:
126        if not d or d == '.':
127            continue
128        if d == '..':
129            try:
130                p.pop()
131                continue
132            except:
133                break
134        p.append(d)
135
136    return r + '/'.join(p)
137
138def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
139    """
140    Return an absolute path, resolving via cwd or last_dir if needed.
141    this gets called a lot, so we try to avoid calling realpath.
142    """
143    rpath = resolve(path, cwd, last_dir, debug, debug_out)
144    if rpath:
145        path = rpath
146    elif len(path) > 0 and path[0] == '/':
147        return None
148    if (path.find('/') < 0 or
149        path.find('./') > 0 or
150        path.endswith('/..')):
151        path = cleanpath(path)
152    return path
153
154def sort_unique(list, cmp=None, key=None, reverse=False):
155    list.sort(cmp, key, reverse)
156    nl = []
157    le = None
158    for e in list:
159        if e == le:
160            continue
161        le = e
162        nl.append(e)
163    return nl
164
165def add_trims(x):
166    return ['/' + x + '/',
167            '/' + x,
168            x + '/',
169            x]
170
171class MetaFile:
172    """class to parse meta files generated by bmake."""
173
174    conf = None
175    dirdep_re = None
176    host_target = None
177    srctops = []
178    objroots = []
179    excludes = []
180    seen = {}
181    obj_deps = []
182    src_deps = []
183    file_deps = []
184
185    def __init__(self, name, conf={}):
186        """if name is set we will parse it now.
187        conf can have the follwing keys:
188
189        SRCTOPS list of tops of the src tree(s).
190
191        CURDIR  the src directory 'bmake' was run from.
192
193        RELDIR  the relative path from SRCTOP to CURDIR
194
195        MACHINE the machine we built for.
196                set to 'none' if we are not cross-building.
197                More specifically if machine cannot be deduced from objdirs.
198
199        TARGET_SPEC
200                Sometimes MACHINE isn't enough.
201
202        HOST_TARGET
203                when we build for the pseudo machine 'host'
204                the object tree uses HOST_TARGET rather than MACHINE.
205
206        OBJROOTS a list of the common prefix for all obj dirs it might
207                end in '/' or '-'.
208
209        DPDEPS  names an optional file to which per file dependencies
210                will be appended.
211                For example if 'some/path/foo.h' is read from SRCTOP
212                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
213                This can allow 'bmake' to learn all the dirs within
214                the tree that depend on 'foo.h'
215
216        EXCLUDES
217                A list of paths to ignore.
218                ccache(1) can otherwise be trouble.
219
220        debug   desired debug level
221
222        debug_out open file to send debug output to (sys.stderr)
223
224        """
225
226        self.name = name
227        self.debug = getv(conf, 'debug', 0)
228        self.debug_out = getv(conf, 'debug_out', sys.stderr)
229
230        self.machine = getv(conf, 'MACHINE', '')
231        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
232        self.target_spec = getv(conf, 'TARGET_SPEC', '')
233        self.curdir = getv(conf, 'CURDIR')
234        self.reldir = getv(conf, 'RELDIR')
235        self.dpdeps = getv(conf, 'DPDEPS')
236        self.line = 0
237
238        if not self.conf:
239            # some of the steps below we want to do only once
240            self.conf = conf
241            self.host_target = getv(conf, 'HOST_TARGET')
242            for srctop in getv(conf, 'SRCTOPS', []):
243                if srctop[-1] != '/':
244                    srctop += '/'
245                if not srctop in self.srctops:
246                    self.srctops.append(srctop)
247                _srctop = os.path.realpath(srctop)
248                if _srctop[-1] != '/':
249                    _srctop += '/'
250                if not _srctop in self.srctops:
251                    self.srctops.append(_srctop)
252
253            trim_list = add_trims(self.machine)
254            if self.machine == 'host':
255                trim_list += add_trims(self.host_target)
256            if self.target_spec:
257                trim_list += add_trims(self.target_spec)
258
259            for objroot in getv(conf, 'OBJROOTS', []):
260                for e in trim_list:
261                    if objroot.endswith(e):
262                        # this is not what we want - fix it
263                        objroot = objroot[0:-len(e)]
264
265                if objroot[-1] != '/':
266                    objroot += '/'
267                if not objroot in self.objroots:
268                    self.objroots.append(objroot)
269                    _objroot = os.path.realpath(objroot)
270                    if objroot[-1] == '/':
271                        _objroot += '/'
272                    if not _objroot in self.objroots:
273                        self.objroots.append(_objroot)
274
275            # we want the longest match
276            self.srctops.sort(reverse=True)
277            self.objroots.sort(reverse=True)
278
279            self.excludes = getv(conf, 'EXCLUDES', [])
280
281            if self.debug:
282                print("host_target=", self.host_target, file=self.debug_out)
283                print("srctops=", self.srctops, file=self.debug_out)
284                print("objroots=", self.objroots, file=self.debug_out)
285                print("excludes=", self.excludes, file=self.debug_out)
286
287            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
288
289        if self.dpdeps and not self.reldir:
290            if self.debug:
291                print("need reldir:", end=' ', file=self.debug_out)
292            if self.curdir:
293                srctop = self.find_top(self.curdir, self.srctops)
294                if srctop:
295                    self.reldir = self.curdir.replace(srctop,'')
296                    if self.debug:
297                        print(self.reldir, file=self.debug_out)
298            if not self.reldir:
299                self.dpdeps = None      # we cannot do it?
300
301        self.cwd = os.getcwd()          # make sure this is initialized
302        self.last_dir = self.cwd
303
304        if name:
305            self.try_parse()
306
307    def reset(self):
308        """reset state if we are being passed meta files from multiple directories."""
309        self.seen = {}
310        self.obj_deps = []
311        self.src_deps = []
312        self.file_deps = []
313
314    def dirdeps(self, sep='\n'):
315        """return DIRDEPS"""
316        return sep.strip() + sep.join(self.obj_deps)
317
318    def src_dirdeps(self, sep='\n'):
319        """return SRC_DIRDEPS"""
320        return sep.strip() + sep.join(self.src_deps)
321
322    def file_depends(self, out=None):
323        """Append DPDEPS_${file} += ${RELDIR}
324        for each file we saw, to the output file."""
325        if not self.reldir:
326            return None
327        for f in sort_unique(self.file_deps):
328            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
329        # these entries provide for reverse DIRDEPS lookup
330        for f in self.obj_deps:
331            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
332
333    def seenit(self, dir):
334        """rememer that we have seen dir."""
335        self.seen[dir] = 1
336
337    def add(self, list, data, clue=''):
338        """add data to list if it isn't already there."""
339        if data not in list:
340            list.append(data)
341            if self.debug:
342                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
343
344    def find_top(self, path, list):
345        """the logical tree may be split across multiple trees"""
346        for top in list:
347            if path.startswith(top):
348                if self.debug > 2:
349                    print("found in", top, file=self.debug_out)
350                return top
351        return None
352
353    def find_obj(self, objroot, dir, path, input):
354        """return path within objroot, taking care of .dirdep files"""
355        ddep = None
356        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
357            if not ddep and os.path.exists(ddepf):
358                ddep = open(ddepf, 'r').readline().strip('# \n')
359                if self.debug > 1:
360                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
361                if ddep.endswith(self.machine):
362                    ddep = ddep[0:-(1+len(self.machine))]
363                elif self.target_spec and ddep.endswith(self.target_spec):
364                    ddep = ddep[0:-(1+len(self.target_spec))]
365
366        if not ddep:
367            # no .dirdeps, so remember that we've seen the raw input
368            self.seenit(input)
369            self.seenit(dir)
370            if self.machine == 'none':
371                if dir.startswith(objroot):
372                    return dir.replace(objroot,'')
373                return None
374            m = self.dirdep_re.match(dir.replace(objroot,''))
375            if m:
376                ddep = m.group(2)
377                dmachine = m.group(1)
378                if dmachine != self.machine:
379                    if not (self.machine == 'host' and
380                            dmachine == self.host_target):
381                        if self.debug > 2:
382                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
383                        ddep += '.' + dmachine
384
385        return ddep
386
387    def try_parse(self, name=None, file=None):
388        """give file and line number causing exception"""
389        try:
390            self.parse(name, file)
391        except:
392            # give a useful clue
393            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
394            raise
395
396    def parse(self, name=None, file=None):
397        """A meta file looks like:
398
399        # Meta data file "path"
400        CMD "command-line"
401        CWD "cwd"
402        TARGET "target"
403        -- command output --
404        -- filemon acquired metadata --
405        # buildmon version 3
406        V 3
407        C "pid" "cwd"
408        E "pid" "path"
409        F "pid" "child"
410        R "pid" "path"
411        W "pid" "path"
412        X "pid" "status"
413        D "pid" "path"
414        L "pid" "src" "target"
415        M "pid" "old" "new"
416        S "pid" "path"
417        # Bye bye
418
419        We go to some effort to avoid processing a dependency more than once.
420        Of the above record types only C,E,F,L,R,V and W are of interest.
421        """
422
423        version = 0                     # unknown
424        if name:
425            self.name = name;
426        if file:
427            f = file
428            cwd = self.last_dir = self.cwd
429        else:
430            f = open(self.name, 'r')
431        skip = True
432        pid_cwd = {}
433        pid_last_dir = {}
434        last_pid = 0
435
436        self.line = 0
437        if self.curdir:
438            self.seenit(self.curdir)    # we ignore this
439
440        interesting = 'CEFLRV'
441        for line in f:
442            self.line += 1
443            # ignore anything we don't care about
444            if not line[0] in interesting:
445                continue
446            if self.debug > 2:
447                print("input:", line, end=' ', file=self.debug_out)
448            w = line.split()
449
450            if skip:
451                if w[0] == 'V':
452                    skip = False
453                    version = int(w[1])
454                    """
455                    if version < 4:
456                        # we cannot ignore 'W' records
457                        # as they may be 'rw'
458                        interesting += 'W'
459                    """
460                elif w[0] == 'CWD':
461                    self.cwd = cwd = self.last_dir = w[1]
462                    self.seenit(cwd)    # ignore this
463                    if self.debug:
464                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
465                continue
466
467            pid = int(w[1])
468            if pid != last_pid:
469                if last_pid:
470                    pid_last_dir[last_pid] = self.last_dir
471                cwd = getv(pid_cwd, pid, self.cwd)
472                self.last_dir = getv(pid_last_dir, pid, self.cwd)
473                last_pid = pid
474
475            # process operations
476            if w[0] == 'F':
477                npid = int(w[2])
478                pid_cwd[npid] = cwd
479                pid_last_dir[npid] = cwd
480                last_pid = npid
481                continue
482            elif w[0] == 'C':
483                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
484                if not cwd:
485                    cwd = w[2]
486                    if self.debug > 1:
487                        print("missing cwd=", cwd, file=self.debug_out)
488                if cwd.endswith('/.'):
489                    cwd = cwd[0:-2]
490                self.last_dir = pid_last_dir[pid] = cwd
491                pid_cwd[pid] = cwd
492                if self.debug > 1:
493                    print("cwd=", cwd, file=self.debug_out)
494                continue
495
496            if w[2] in self.seen:
497                if self.debug > 2:
498                    print("seen:", w[2], file=self.debug_out)
499                continue
500            # file operations
501            if w[0] in 'ML':
502                # these are special, tread src as read and
503                # target as write
504                self.parse_path(w[2].strip("'"), cwd, 'R', w)
505                self.parse_path(w[3].strip("'"), cwd, 'W', w)
506                continue
507            elif w[0] in 'ERWS':
508                path = w[2]
509                if path == '.':
510                    continue
511                self.parse_path(path, cwd, w[0], w)
512
513        if not file:
514            f.close()
515
516    def is_src(self, base, dir, rdir):
517        """is base in srctop"""
518        for dir in [dir,rdir]:
519            if not dir:
520                continue
521            path = '/'.join([dir,base])
522            srctop = self.find_top(path, self.srctops)
523            if srctop:
524                if self.dpdeps:
525                    self.add(self.file_deps, path.replace(srctop,''), 'file')
526                self.add(self.src_deps, dir.replace(srctop,''), 'src')
527                self.seenit(dir)
528                return True
529        return False
530
531    def parse_path(self, path, cwd, op=None, w=[]):
532        """look at a path for the op specified"""
533
534        if not op:
535            op = w[0]
536
537        # we are never interested in .dirdep files as dependencies
538        if path.endswith('.dirdep'):
539            return
540        for p in self.excludes:
541            if p and path.startswith(p):
542                if self.debug > 2:
543                    print("exclude:", p, path, file=self.debug_out)
544                return
545        # we don't want to resolve the last component if it is
546        # a symlink
547        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
548        if not path:
549            return
550        dir,base = os.path.split(path)
551        if dir in self.seen:
552            if self.debug > 2:
553                print("seen:", dir, file=self.debug_out)
554            return
555        # we can have a path in an objdir which is a link
556        # to the src dir, we may need to add dependencies for each
557        rdir = dir
558        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
559        rdir = os.path.realpath(dir)
560        if rdir == dir:
561            rdir = None
562        # now put path back together
563        path = '/'.join([dir,base])
564        if self.debug > 1:
565            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
566        if op in 'RWS':
567            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
568                if self.debug > 1:
569                    print("skipping:", path, file=self.debug_out)
570                return
571            if os.path.isdir(path):
572                if op in 'RW':
573                    self.last_dir = path;
574                if self.debug > 1:
575                    print("ldir=", self.last_dir, file=self.debug_out)
576                return
577
578        if op in 'ER':
579            # finally, we get down to it
580            if dir == self.cwd or dir == self.curdir:
581                return
582            if self.is_src(base, dir, rdir):
583                self.seenit(w[2])
584                if not rdir:
585                    return
586
587            objroot = None
588            for dir in [dir,rdir]:
589                if not dir:
590                    continue
591                objroot = self.find_top(dir, self.objroots)
592                if objroot:
593                    break
594            if objroot:
595                ddep = self.find_obj(objroot, dir, path, w[2])
596                if ddep:
597                    self.add(self.obj_deps, ddep, 'obj')
598                    if self.dpdeps and objroot.endswith('/stage/'):
599                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
600                        self.add(self.file_deps, sp, 'file')
601            else:
602                # don't waste time looking again
603                self.seenit(w[2])
604                self.seenit(dir)
605
606
607def main(argv, klass=MetaFile, xopts='', xoptf=None):
608    """Simple driver for class MetaFile.
609
610    Usage:
611        script [options] [key=value ...] "meta" ...
612
613    Options and key=value pairs contribute to the
614    dictionary passed to MetaFile.
615
616    -S "SRCTOP"
617                add "SRCTOP" to the "SRCTOPS" list.
618
619    -C "CURDIR"
620
621    -O "OBJROOT"
622                add "OBJROOT" to the "OBJROOTS" list.
623
624    -m "MACHINE"
625
626    -a "MACHINE_ARCH"
627
628    -H "HOST_TARGET"
629
630    -D "DPDEPS"
631
632    -d  bumps debug level
633
634    """
635    import getopt
636
637    # import Psyco if we can
638    # it can speed things up quite a bit
639    have_psyco = 0
640    try:
641        import psyco
642        psyco.full()
643        have_psyco = 1
644    except:
645        pass
646
647    conf = {
648        'SRCTOPS': [],
649        'OBJROOTS': [],
650        'EXCLUDES': [],
651        }
652
653    try:
654        machine = os.environ['MACHINE']
655        if machine:
656            conf['MACHINE'] = machine
657        machine_arch = os.environ['MACHINE_ARCH']
658        if machine_arch:
659            conf['MACHINE_ARCH'] = machine_arch
660        srctop = os.environ['SB_SRC']
661        if srctop:
662            conf['SRCTOPS'].append(srctop)
663        objroot = os.environ['SB_OBJROOT']
664        if objroot:
665            conf['OBJROOTS'].append(objroot)
666    except:
667        pass
668
669    debug = 0
670    output = True
671
672    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
673    for o, a in opts:
674        if o == '-a':
675            conf['MACHINE_ARCH'] = a
676        elif o == '-d':
677            debug += 1
678        elif o == '-q':
679            output = False
680        elif o == '-H':
681            conf['HOST_TARGET'] = a
682        elif o == '-S':
683            if a not in conf['SRCTOPS']:
684                conf['SRCTOPS'].append(a)
685        elif o == '-C':
686            conf['CURDIR'] = a
687        elif o == '-O':
688            if a not in conf['OBJROOTS']:
689                conf['OBJROOTS'].append(a)
690        elif o == '-R':
691            conf['RELDIR'] = a
692        elif o == '-D':
693            conf['DPDEPS'] = a
694        elif o == '-m':
695            conf['MACHINE'] = a
696        elif o == '-T':
697            conf['TARGET_SPEC'] = a
698        elif o == '-X':
699            if a not in conf['EXCLUDES']:
700                conf['EXCLUDES'].append(a)
701        elif xoptf:
702            xoptf(o, a, conf)
703
704    conf['debug'] = debug
705
706    # get any var=val assignments
707    eaten = []
708    for a in args:
709        if a.find('=') > 0:
710            k,v = a.split('=')
711            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
712                if k == 'SRCTOP':
713                    k = 'SRCTOPS'
714                elif k == 'OBJROOT':
715                    k = 'OBJROOTS'
716                if v not in conf[k]:
717                    conf[k].append(v)
718            else:
719                conf[k] = v
720            eaten.append(a)
721            continue
722        break
723
724    for a in eaten:
725        args.remove(a)
726
727    debug_out = getv(conf, 'debug_out', sys.stderr)
728
729    if debug:
730        print("config:", file=debug_out)
731        print("psyco=", have_psyco, file=debug_out)
732        for k,v in list(conf.items()):
733            print("%s=%s" % (k,v), file=debug_out)
734
735    m = None
736    for a in args:
737        if a.endswith('.meta'):
738            if not os.path.exists(a):
739                continue
740            m = klass(a, conf)
741        elif a.startswith('@'):
742            # there can actually multiple files per line
743            for line in open(a[1:]):
744                for f in line.strip().split():
745                    if not os.path.exists(f):
746                        continue
747                    m = klass(f, conf)
748
749    if output and m:
750        print(m.dirdeps())
751
752        print(m.src_dirdeps('\nsrc:'))
753
754        dpdeps = getv(conf, 'DPDEPS')
755        if dpdeps:
756            m.file_depends(open(dpdeps, 'wb'))
757
758    return m
759
760if __name__ == '__main__':
761    try:
762        main(sys.argv)
763    except:
764        # yes, this goes to stdout
765        print("ERROR: ", sys.exc_info()[1])
766        raise
767
768