xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision 99282790b7d01ec3c4072621d46a0d7302517ad4)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.30 2020/06/08 23:05:00 sjg Exp $
41
42	Copyright (c) 2011-2019, Simon J. Gerraty
43	Copyright (c) 2011-2017, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def getv(dict, key, d=None):
72    """Lookup key in dict and return value or the supplied default."""
73    if key in dict:
74        return dict[key]
75    return d
76
77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78    """
79    Return an absolute path, resolving via cwd or last_dir if needed.
80    """
81    if path.endswith('/.'):
82        path = path[0:-2]
83    if len(path) > 0 and path[0] == '/':
84        if os.path.exists(path):
85            return path
86        if debug > 2:
87            print("skipping non-existent:", path, file=debug_out)
88        return None
89    if path == '.':
90        return cwd
91    if path.startswith('./'):
92        return cwd + path[1:]
93    if last_dir == cwd:
94        last_dir = None
95    for d in [last_dir, cwd]:
96        if not d:
97            continue
98        if path == '..':
99            dw = d.split('/')
100            p = '/'.join(dw[:-1])
101            if not p:
102                p = '/'
103            return p
104        p = '/'.join([d,path])
105        if debug > 2:
106            print("looking for:", p, end=' ', file=debug_out)
107        if not os.path.exists(p):
108            if debug > 2:
109                print("nope", file=debug_out)
110            p = None
111            continue
112        if debug > 2:
113            print("found:", p, file=debug_out)
114        return p
115    return None
116
117def cleanpath(path):
118    """cleanup path without using realpath(3)"""
119    if path.startswith('/'):
120        r = '/'
121    else:
122        r = ''
123    p = []
124    w = path.split('/')
125    for d in w:
126        if not d or d == '.':
127            continue
128        if d == '..':
129            try:
130                p.pop()
131                continue
132            except:
133                break
134        p.append(d)
135
136    return r + '/'.join(p)
137
138def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
139    """
140    Return an absolute path, resolving via cwd or last_dir if needed.
141    this gets called a lot, so we try to avoid calling realpath.
142    """
143    rpath = resolve(path, cwd, last_dir, debug, debug_out)
144    if rpath:
145        path = rpath
146    elif len(path) > 0 and path[0] == '/':
147        return None
148    if (path.find('/') < 0 or
149        path.find('./') > 0 or
150        path.endswith('/..')):
151        path = cleanpath(path)
152    return path
153
154def sort_unique(list, cmp=None, key=None, reverse=False):
155    list.sort(cmp, key, reverse)
156    nl = []
157    le = None
158    for e in list:
159        if e == le:
160            continue
161        le = e
162        nl.append(e)
163    return nl
164
165def add_trims(x):
166    return ['/' + x + '/',
167            '/' + x,
168            x + '/',
169            x]
170
171class MetaFile:
172    """class to parse meta files generated by bmake."""
173
174    conf = None
175    dirdep_re = None
176    host_target = None
177    srctops = []
178    objroots = []
179    excludes = []
180    seen = {}
181    obj_deps = []
182    src_deps = []
183    file_deps = []
184
185    def __init__(self, name, conf={}):
186        """if name is set we will parse it now.
187        conf can have the follwing keys:
188
189        SRCTOPS list of tops of the src tree(s).
190
191        CURDIR  the src directory 'bmake' was run from.
192
193        RELDIR  the relative path from SRCTOP to CURDIR
194
195        MACHINE the machine we built for.
196                set to 'none' if we are not cross-building.
197                More specifically if machine cannot be deduced from objdirs.
198
199        TARGET_SPEC
200                Sometimes MACHINE isn't enough.
201
202        HOST_TARGET
203                when we build for the pseudo machine 'host'
204                the object tree uses HOST_TARGET rather than MACHINE.
205
206        OBJROOTS a list of the common prefix for all obj dirs it might
207                end in '/' or '-'.
208
209        DPDEPS  names an optional file to which per file dependencies
210                will be appended.
211                For example if 'some/path/foo.h' is read from SRCTOP
212                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
213                This can allow 'bmake' to learn all the dirs within
214                the tree that depend on 'foo.h'
215
216        EXCLUDES
217                A list of paths to ignore.
218                ccache(1) can otherwise be trouble.
219
220        debug   desired debug level
221
222        debug_out open file to send debug output to (sys.stderr)
223
224        """
225
226        self.name = name
227        self.debug = getv(conf, 'debug', 0)
228        self.debug_out = getv(conf, 'debug_out', sys.stderr)
229
230        self.machine = getv(conf, 'MACHINE', '')
231        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
232        self.target_spec = getv(conf, 'TARGET_SPEC', '')
233        self.curdir = getv(conf, 'CURDIR')
234        self.reldir = getv(conf, 'RELDIR')
235        self.dpdeps = getv(conf, 'DPDEPS')
236        self.line = 0
237
238        if not self.conf:
239            # some of the steps below we want to do only once
240            self.conf = conf
241            self.host_target = getv(conf, 'HOST_TARGET')
242            for srctop in getv(conf, 'SRCTOPS', []):
243                if srctop[-1] != '/':
244                    srctop += '/'
245                if not srctop in self.srctops:
246                    self.srctops.append(srctop)
247                _srctop = os.path.realpath(srctop)
248                if _srctop[-1] != '/':
249                    _srctop += '/'
250                if not _srctop in self.srctops:
251                    self.srctops.append(_srctop)
252
253            trim_list = add_trims(self.machine)
254            if self.machine == 'host':
255                trim_list += add_trims(self.host_target)
256            if self.target_spec:
257                trim_list += add_trims(self.target_spec)
258
259            for objroot in getv(conf, 'OBJROOTS', []):
260                for e in trim_list:
261                    if objroot.endswith(e):
262                        # this is not what we want - fix it
263                        objroot = objroot[0:-len(e)]
264
265                if objroot[-1] != '/':
266                    objroot += '/'
267                if not objroot in self.objroots:
268                    self.objroots.append(objroot)
269                    _objroot = os.path.realpath(objroot)
270                    if objroot[-1] == '/':
271                        _objroot += '/'
272                    if not _objroot in self.objroots:
273                        self.objroots.append(_objroot)
274
275            # we want the longest match
276            self.srctops.sort(reverse=True)
277            self.objroots.sort(reverse=True)
278
279            self.excludes = getv(conf, 'EXCLUDES', [])
280
281            if self.debug:
282                print("host_target=", self.host_target, file=self.debug_out)
283                print("srctops=", self.srctops, file=self.debug_out)
284                print("objroots=", self.objroots, file=self.debug_out)
285                print("excludes=", self.excludes, file=self.debug_out)
286
287            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
288
289        if self.dpdeps and not self.reldir:
290            if self.debug:
291                print("need reldir:", end=' ', file=self.debug_out)
292            if self.curdir:
293                srctop = self.find_top(self.curdir, self.srctops)
294                if srctop:
295                    self.reldir = self.curdir.replace(srctop,'')
296                    if self.debug:
297                        print(self.reldir, file=self.debug_out)
298            if not self.reldir:
299                self.dpdeps = None      # we cannot do it?
300
301        self.cwd = os.getcwd()          # make sure this is initialized
302        self.last_dir = self.cwd
303
304        if name:
305            self.try_parse()
306
307    def reset(self):
308        """reset state if we are being passed meta files from multiple directories."""
309        self.seen = {}
310        self.obj_deps = []
311        self.src_deps = []
312        self.file_deps = []
313
314    def dirdeps(self, sep='\n'):
315        """return DIRDEPS"""
316        return sep.strip() + sep.join(self.obj_deps)
317
318    def src_dirdeps(self, sep='\n'):
319        """return SRC_DIRDEPS"""
320        return sep.strip() + sep.join(self.src_deps)
321
322    def file_depends(self, out=None):
323        """Append DPDEPS_${file} += ${RELDIR}
324        for each file we saw, to the output file."""
325        if not self.reldir:
326            return None
327        for f in sort_unique(self.file_deps):
328            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
329        # these entries provide for reverse DIRDEPS lookup
330        for f in self.obj_deps:
331            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
332
333    def seenit(self, dir):
334        """rememer that we have seen dir."""
335        self.seen[dir] = 1
336
337    def add(self, list, data, clue=''):
338        """add data to list if it isn't already there."""
339        if data not in list:
340            list.append(data)
341            if self.debug:
342                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
343
344    def find_top(self, path, list):
345        """the logical tree may be split across multiple trees"""
346        for top in list:
347            if path.startswith(top):
348                if self.debug > 2:
349                    print("found in", top, file=self.debug_out)
350                return top
351        return None
352
353    def find_obj(self, objroot, dir, path, input):
354        """return path within objroot, taking care of .dirdep files"""
355        ddep = None
356        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
357            if not ddep and os.path.exists(ddepf):
358                ddep = open(ddepf, 'r').readline().strip('# \n')
359                if self.debug > 1:
360                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
361                if ddep.endswith(self.machine):
362                    ddep = ddep[0:-(1+len(self.machine))]
363                elif self.target_spec and ddep.endswith(self.target_spec):
364                    ddep = ddep[0:-(1+len(self.target_spec))]
365
366        if not ddep:
367            # no .dirdeps, so remember that we've seen the raw input
368            self.seenit(input)
369            self.seenit(dir)
370            if self.machine == 'none':
371                if dir.startswith(objroot):
372                    return dir.replace(objroot,'')
373                return None
374            m = self.dirdep_re.match(dir.replace(objroot,''))
375            if m:
376                ddep = m.group(2)
377                dmachine = m.group(1)
378                if dmachine != self.machine:
379                    if not (self.machine == 'host' and
380                            dmachine == self.host_target):
381                        if self.debug > 2:
382                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
383                        ddep += '.' + dmachine
384
385        return ddep
386
387    def try_parse(self, name=None, file=None):
388        """give file and line number causing exception"""
389        try:
390            self.parse(name, file)
391        except:
392            # give a useful clue
393            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
394            raise
395
396    def parse(self, name=None, file=None):
397        """A meta file looks like:
398
399        # Meta data file "path"
400        CMD "command-line"
401        CWD "cwd"
402        TARGET "target"
403        -- command output --
404        -- filemon acquired metadata --
405        # buildmon version 3
406        V 3
407        C "pid" "cwd"
408        E "pid" "path"
409        F "pid" "child"
410        R "pid" "path"
411        W "pid" "path"
412        X "pid" "status"
413        D "pid" "path"
414        L "pid" "src" "target"
415        M "pid" "old" "new"
416        S "pid" "path"
417        # Bye bye
418
419        We go to some effort to avoid processing a dependency more than once.
420        Of the above record types only C,E,F,L,R,V and W are of interest.
421        """
422
423        version = 0                     # unknown
424        if name:
425            self.name = name;
426        if file:
427            f = file
428            cwd = self.last_dir = self.cwd
429        else:
430            f = open(self.name, 'r')
431        skip = True
432        pid_cwd = {}
433        pid_last_dir = {}
434        last_pid = 0
435
436        self.line = 0
437        if self.curdir:
438            self.seenit(self.curdir)    # we ignore this
439
440        interesting = 'CEFLRV'
441        for line in f:
442            self.line += 1
443            # ignore anything we don't care about
444            if not line[0] in interesting:
445                continue
446            if self.debug > 2:
447                print("input:", line, end=' ', file=self.debug_out)
448            w = line.split()
449
450            if skip:
451                if w[0] == 'V':
452                    skip = False
453                    version = int(w[1])
454                    """
455                    if version < 4:
456                        # we cannot ignore 'W' records
457                        # as they may be 'rw'
458                        interesting += 'W'
459                    """
460                elif w[0] == 'CWD':
461                    self.cwd = cwd = self.last_dir = w[1]
462                    self.seenit(cwd)    # ignore this
463                    if self.debug:
464                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
465                continue
466
467            pid = int(w[1])
468            if pid != last_pid:
469                if last_pid:
470                    pid_last_dir[last_pid] = self.last_dir
471                cwd = getv(pid_cwd, pid, self.cwd)
472                self.last_dir = getv(pid_last_dir, pid, self.cwd)
473                last_pid = pid
474
475            # process operations
476            if w[0] == 'F':
477                npid = int(w[2])
478                pid_cwd[npid] = cwd
479                pid_last_dir[npid] = cwd
480                last_pid = npid
481                continue
482            elif w[0] == 'C':
483                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
484                if not cwd:
485                    cwd = w[2]
486                    if self.debug > 1:
487                        print("missing cwd=", cwd, file=self.debug_out)
488                if cwd.endswith('/.'):
489                    cwd = cwd[0:-2]
490                self.last_dir = pid_last_dir[pid] = cwd
491                pid_cwd[pid] = cwd
492                if self.debug > 1:
493                    print("cwd=", cwd, file=self.debug_out)
494                continue
495
496            if w[2] in self.seen:
497                if self.debug > 2:
498                    print("seen:", w[2], file=self.debug_out)
499                continue
500            # file operations
501            if w[0] in 'ML':
502                # these are special, tread src as read and
503                # target as write
504                self.parse_path(w[2].strip("'"), cwd, 'R', w)
505                self.parse_path(w[3].strip("'"), cwd, 'W', w)
506                continue
507            elif w[0] in 'ERWS':
508                path = w[2]
509                self.parse_path(path, cwd, w[0], w)
510
511        if not file:
512            f.close()
513
514    def is_src(self, base, dir, rdir):
515        """is base in srctop"""
516        for dir in [dir,rdir]:
517            if not dir:
518                continue
519            path = '/'.join([dir,base])
520            srctop = self.find_top(path, self.srctops)
521            if srctop:
522                if self.dpdeps:
523                    self.add(self.file_deps, path.replace(srctop,''), 'file')
524                self.add(self.src_deps, dir.replace(srctop,''), 'src')
525                self.seenit(dir)
526                return True
527        return False
528
529    def parse_path(self, path, cwd, op=None, w=[]):
530        """look at a path for the op specified"""
531
532        if not op:
533            op = w[0]
534
535        # we are never interested in .dirdep files as dependencies
536        if path.endswith('.dirdep'):
537            return
538        for p in self.excludes:
539            if p and path.startswith(p):
540                if self.debug > 2:
541                    print("exclude:", p, path, file=self.debug_out)
542                return
543        # we don't want to resolve the last component if it is
544        # a symlink
545        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
546        if not path:
547            return
548        dir,base = os.path.split(path)
549        if dir in self.seen:
550            if self.debug > 2:
551                print("seen:", dir, file=self.debug_out)
552            return
553        # we can have a path in an objdir which is a link
554        # to the src dir, we may need to add dependencies for each
555        rdir = dir
556        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
557        rdir = os.path.realpath(dir)
558        if rdir == dir:
559            rdir = None
560        # now put path back together
561        path = '/'.join([dir,base])
562        if self.debug > 1:
563            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
564        if op in 'RWS':
565            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
566                if self.debug > 1:
567                    print("skipping:", path, file=self.debug_out)
568                return
569            if os.path.isdir(path):
570                if op in 'RW':
571                    self.last_dir = path;
572                if self.debug > 1:
573                    print("ldir=", self.last_dir, file=self.debug_out)
574                return
575
576        if op in 'ER':
577            # finally, we get down to it
578            if dir == self.cwd or dir == self.curdir:
579                return
580            if self.is_src(base, dir, rdir):
581                self.seenit(w[2])
582                if not rdir:
583                    return
584
585            objroot = None
586            for dir in [dir,rdir]:
587                if not dir:
588                    continue
589                objroot = self.find_top(dir, self.objroots)
590                if objroot:
591                    break
592            if objroot:
593                ddep = self.find_obj(objroot, dir, path, w[2])
594                if ddep:
595                    self.add(self.obj_deps, ddep, 'obj')
596                    if self.dpdeps and objroot.endswith('/stage/'):
597                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
598                        self.add(self.file_deps, sp, 'file')
599            else:
600                # don't waste time looking again
601                self.seenit(w[2])
602                self.seenit(dir)
603
604
605def main(argv, klass=MetaFile, xopts='', xoptf=None):
606    """Simple driver for class MetaFile.
607
608    Usage:
609        script [options] [key=value ...] "meta" ...
610
611    Options and key=value pairs contribute to the
612    dictionary passed to MetaFile.
613
614    -S "SRCTOP"
615                add "SRCTOP" to the "SRCTOPS" list.
616
617    -C "CURDIR"
618
619    -O "OBJROOT"
620                add "OBJROOT" to the "OBJROOTS" list.
621
622    -m "MACHINE"
623
624    -a "MACHINE_ARCH"
625
626    -H "HOST_TARGET"
627
628    -D "DPDEPS"
629
630    -d  bumps debug level
631
632    """
633    import getopt
634
635    # import Psyco if we can
636    # it can speed things up quite a bit
637    have_psyco = 0
638    try:
639        import psyco
640        psyco.full()
641        have_psyco = 1
642    except:
643        pass
644
645    conf = {
646        'SRCTOPS': [],
647        'OBJROOTS': [],
648        'EXCLUDES': [],
649        }
650
651    try:
652        machine = os.environ['MACHINE']
653        if machine:
654            conf['MACHINE'] = machine
655        machine_arch = os.environ['MACHINE_ARCH']
656        if machine_arch:
657            conf['MACHINE_ARCH'] = machine_arch
658        srctop = os.environ['SB_SRC']
659        if srctop:
660            conf['SRCTOPS'].append(srctop)
661        objroot = os.environ['SB_OBJROOT']
662        if objroot:
663            conf['OBJROOTS'].append(objroot)
664    except:
665        pass
666
667    debug = 0
668    output = True
669
670    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
671    for o, a in opts:
672        if o == '-a':
673            conf['MACHINE_ARCH'] = a
674        elif o == '-d':
675            debug += 1
676        elif o == '-q':
677            output = False
678        elif o == '-H':
679            conf['HOST_TARGET'] = a
680        elif o == '-S':
681            if a not in conf['SRCTOPS']:
682                conf['SRCTOPS'].append(a)
683        elif o == '-C':
684            conf['CURDIR'] = a
685        elif o == '-O':
686            if a not in conf['OBJROOTS']:
687                conf['OBJROOTS'].append(a)
688        elif o == '-R':
689            conf['RELDIR'] = a
690        elif o == '-D':
691            conf['DPDEPS'] = a
692        elif o == '-m':
693            conf['MACHINE'] = a
694        elif o == '-T':
695            conf['TARGET_SPEC'] = a
696        elif o == '-X':
697            if a not in conf['EXCLUDES']:
698                conf['EXCLUDES'].append(a)
699        elif xoptf:
700            xoptf(o, a, conf)
701
702    conf['debug'] = debug
703
704    # get any var=val assignments
705    eaten = []
706    for a in args:
707        if a.find('=') > 0:
708            k,v = a.split('=')
709            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
710                if k == 'SRCTOP':
711                    k = 'SRCTOPS'
712                elif k == 'OBJROOT':
713                    k = 'OBJROOTS'
714                if v not in conf[k]:
715                    conf[k].append(v)
716            else:
717                conf[k] = v
718            eaten.append(a)
719            continue
720        break
721
722    for a in eaten:
723        args.remove(a)
724
725    debug_out = getv(conf, 'debug_out', sys.stderr)
726
727    if debug:
728        print("config:", file=debug_out)
729        print("psyco=", have_psyco, file=debug_out)
730        for k,v in list(conf.items()):
731            print("%s=%s" % (k,v), file=debug_out)
732
733    m = None
734    for a in args:
735        if a.endswith('.meta'):
736            if not os.path.exists(a):
737                continue
738            m = klass(a, conf)
739        elif a.startswith('@'):
740            # there can actually multiple files per line
741            for line in open(a[1:]):
742                for f in line.strip().split():
743                    if not os.path.exists(f):
744                        continue
745                    m = klass(f, conf)
746
747    if output and m:
748        print(m.dirdeps())
749
750        print(m.src_dirdeps('\nsrc:'))
751
752        dpdeps = getv(conf, 'DPDEPS')
753        if dpdeps:
754            m.file_depends(open(dpdeps, 'wb'))
755
756    return m
757
758if __name__ == '__main__':
759    try:
760        main(sys.argv)
761    except:
762        # yes, this goes to stdout
763        print("ERROR: ", sys.exc_info()[1])
764        raise
765
766