xref: /freebsd/contrib/bmake/mk/meta2deps.py (revision fe6060f10f634930ff71b7c50291ddc610da2475)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$Id: meta2deps.py,v 1.38 2021/06/17 05:20:08 sjg Exp $
41
42	Copyright (c) 2011-2020, Simon J. Gerraty
43	Copyright (c) 2011-2017, Juniper Networks, Inc.
44	All rights reserved.
45
46	Redistribution and use in source and binary forms, with or without
47	modification, are permitted provided that the following conditions
48	are met:
49	1. Redistributions of source code must retain the above copyright
50	   notice, this list of conditions and the following disclaimer.
51	2. Redistributions in binary form must reproduce the above copyright
52	   notice, this list of conditions and the following disclaimer in the
53	   documentation and/or other materials provided with the distribution.
54
55	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
66
67"""
68
69import os, re, sys
70
71def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
72    """
73    Return an absolute path, resolving via cwd or last_dir if needed.
74    """
75    if path.endswith('/.'):
76        path = path[0:-2]
77    if len(path) > 0 and path[0] == '/':
78        if os.path.exists(path):
79            return path
80        if debug > 2:
81            print("skipping non-existent:", path, file=debug_out)
82        return None
83    if path == '.':
84        return cwd
85    if path.startswith('./'):
86        return cwd + path[1:]
87    if last_dir == cwd:
88        last_dir = None
89    for d in [last_dir, cwd]:
90        if not d:
91            continue
92        if path == '..':
93            dw = d.split('/')
94            p = '/'.join(dw[:-1])
95            if not p:
96                p = '/'
97            return p
98        p = '/'.join([d,path])
99        if debug > 2:
100            print("looking for:", p, end=' ', file=debug_out)
101        if not os.path.exists(p):
102            if debug > 2:
103                print("nope", file=debug_out)
104            p = None
105            continue
106        if debug > 2:
107            print("found:", p, file=debug_out)
108        return p
109    return None
110
111def cleanpath(path):
112    """cleanup path without using realpath(3)"""
113    if path.startswith('/'):
114        r = '/'
115    else:
116        r = ''
117    p = []
118    w = path.split('/')
119    for d in w:
120        if not d or d == '.':
121            continue
122        if d == '..':
123            try:
124                p.pop()
125                continue
126            except:
127                break
128        p.append(d)
129
130    return r + '/'.join(p)
131
132def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
133    """
134    Return an absolute path, resolving via cwd or last_dir if needed.
135    this gets called a lot, so we try to avoid calling realpath.
136    """
137    rpath = resolve(path, cwd, last_dir, debug, debug_out)
138    if rpath:
139        path = rpath
140    elif len(path) > 0 and path[0] == '/':
141        return None
142    if (path.find('/') < 0 or
143        path.find('./') > 0 or
144        path.endswith('/..')):
145        path = cleanpath(path)
146    return path
147
148def sort_unique(list, cmp=None, key=None, reverse=False):
149    if sys.version_info[0] == 2:
150        list.sort(cmp, key, reverse)
151    else:
152        list.sort(reverse=reverse)
153    nl = []
154    le = None
155    for e in list:
156        if e == le:
157            continue
158        le = e
159        nl.append(e)
160    return nl
161
162def add_trims(x):
163    return ['/' + x + '/',
164            '/' + x,
165            x + '/',
166            x]
167
168class MetaFile:
169    """class to parse meta files generated by bmake."""
170
171    conf = None
172    dirdep_re = None
173    host_target = None
174    srctops = []
175    objroots = []
176    excludes = []
177    seen = {}
178    obj_deps = []
179    src_deps = []
180    file_deps = []
181
182    def __init__(self, name, conf={}):
183        """if name is set we will parse it now.
184        conf can have the follwing keys:
185
186        SRCTOPS list of tops of the src tree(s).
187
188        CURDIR  the src directory 'bmake' was run from.
189
190        RELDIR  the relative path from SRCTOP to CURDIR
191
192        MACHINE the machine we built for.
193                set to 'none' if we are not cross-building.
194                More specifically if machine cannot be deduced from objdirs.
195
196        TARGET_SPEC
197                Sometimes MACHINE isn't enough.
198
199        HOST_TARGET
200                when we build for the pseudo machine 'host'
201                the object tree uses HOST_TARGET rather than MACHINE.
202
203        OBJROOTS a list of the common prefix for all obj dirs it might
204                end in '/' or '-'.
205
206        DPDEPS  names an optional file to which per file dependencies
207                will be appended.
208                For example if 'some/path/foo.h' is read from SRCTOP
209                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
210                This can allow 'bmake' to learn all the dirs within
211                the tree that depend on 'foo.h'
212
213        EXCLUDES
214                A list of paths to ignore.
215                ccache(1) can otherwise be trouble.
216
217        debug   desired debug level
218
219        debug_out open file to send debug output to (sys.stderr)
220
221        """
222
223        self.name = name
224        self.debug = conf.get('debug', 0)
225        self.debug_out = conf.get('debug_out', sys.stderr)
226
227        self.machine = conf.get('MACHINE', '')
228        self.machine_arch = conf.get('MACHINE_ARCH', '')
229        self.target_spec = conf.get('TARGET_SPEC', '')
230        self.curdir = conf.get('CURDIR')
231        self.reldir = conf.get('RELDIR')
232        self.dpdeps = conf.get('DPDEPS')
233        self.line = 0
234
235        if not self.conf:
236            # some of the steps below we want to do only once
237            self.conf = conf
238            self.host_target = conf.get('HOST_TARGET')
239            for srctop in conf.get('SRCTOPS', []):
240                if srctop[-1] != '/':
241                    srctop += '/'
242                if not srctop in self.srctops:
243                    self.srctops.append(srctop)
244                _srctop = os.path.realpath(srctop)
245                if _srctop[-1] != '/':
246                    _srctop += '/'
247                if not _srctop in self.srctops:
248                    self.srctops.append(_srctop)
249
250            trim_list = add_trims(self.machine)
251            if self.machine == 'host':
252                trim_list += add_trims(self.host_target)
253            if self.target_spec:
254                trim_list += add_trims(self.target_spec)
255
256            for objroot in conf.get('OBJROOTS', []):
257                for e in trim_list:
258                    if objroot.endswith(e):
259                        # this is not what we want - fix it
260                        objroot = objroot[0:-len(e)]
261
262                if objroot[-1] != '/':
263                    objroot += '/'
264                if not objroot in self.objroots:
265                    self.objroots.append(objroot)
266                    _objroot = os.path.realpath(objroot)
267                    if objroot[-1] == '/':
268                        _objroot += '/'
269                    if not _objroot in self.objroots:
270                        self.objroots.append(_objroot)
271
272            # we want the longest match
273            self.srctops.sort(reverse=True)
274            self.objroots.sort(reverse=True)
275
276            self.excludes = conf.get('EXCLUDES', [])
277
278            if self.debug:
279                print("host_target=", self.host_target, file=self.debug_out)
280                print("srctops=", self.srctops, file=self.debug_out)
281                print("objroots=", self.objroots, file=self.debug_out)
282                print("excludes=", self.excludes, file=self.debug_out)
283
284            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
285
286        if self.dpdeps and not self.reldir:
287            if self.debug:
288                print("need reldir:", end=' ', file=self.debug_out)
289            if self.curdir:
290                srctop = self.find_top(self.curdir, self.srctops)
291                if srctop:
292                    self.reldir = self.curdir.replace(srctop,'')
293                    if self.debug:
294                        print(self.reldir, file=self.debug_out)
295            if not self.reldir:
296                self.dpdeps = None      # we cannot do it?
297
298        self.cwd = os.getcwd()          # make sure this is initialized
299        self.last_dir = self.cwd
300
301        if name:
302            self.try_parse()
303
304    def reset(self):
305        """reset state if we are being passed meta files from multiple directories."""
306        self.seen = {}
307        self.obj_deps = []
308        self.src_deps = []
309        self.file_deps = []
310
311    def dirdeps(self, sep='\n'):
312        """return DIRDEPS"""
313        return sep.strip() + sep.join(self.obj_deps)
314
315    def src_dirdeps(self, sep='\n'):
316        """return SRC_DIRDEPS"""
317        return sep.strip() + sep.join(self.src_deps)
318
319    def file_depends(self, out=None):
320        """Append DPDEPS_${file} += ${RELDIR}
321        for each file we saw, to the output file."""
322        if not self.reldir:
323            return None
324        for f in sort_unique(self.file_deps):
325            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
326        # these entries provide for reverse DIRDEPS lookup
327        for f in self.obj_deps:
328            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
329
330    def seenit(self, dir):
331        """rememer that we have seen dir."""
332        self.seen[dir] = 1
333
334    def add(self, list, data, clue=''):
335        """add data to list if it isn't already there."""
336        if data not in list:
337            list.append(data)
338            if self.debug:
339                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
340
341    def find_top(self, path, list):
342        """the logical tree may be split across multiple trees"""
343        for top in list:
344            if path.startswith(top):
345                if self.debug > 2:
346                    print("found in", top, file=self.debug_out)
347                return top
348        return None
349
350    def find_obj(self, objroot, dir, path, input):
351        """return path within objroot, taking care of .dirdep files"""
352        ddep = None
353        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
354            if not ddep and os.path.exists(ddepf):
355                ddep = open(ddepf, 'r').readline().strip('# \n')
356                if self.debug > 1:
357                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
358                if ddep.endswith(self.machine):
359                    ddep = ddep[0:-(1+len(self.machine))]
360                elif self.target_spec and ddep.endswith(self.target_spec):
361                    ddep = ddep[0:-(1+len(self.target_spec))]
362
363        if not ddep:
364            # no .dirdeps, so remember that we've seen the raw input
365            self.seenit(input)
366            self.seenit(dir)
367            if self.machine == 'none':
368                if dir.startswith(objroot):
369                    return dir.replace(objroot,'')
370                return None
371            m = self.dirdep_re.match(dir.replace(objroot,''))
372            if m:
373                ddep = m.group(2)
374                dmachine = m.group(1)
375                if dmachine != self.machine:
376                    if not (self.machine == 'host' and
377                            dmachine == self.host_target):
378                        if self.debug > 2:
379                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
380                        ddep += '.' + dmachine
381
382        return ddep
383
384    def try_parse(self, name=None, file=None):
385        """give file and line number causing exception"""
386        try:
387            self.parse(name, file)
388        except:
389            # give a useful clue
390            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
391            raise
392
393    def parse(self, name=None, file=None):
394        """A meta file looks like:
395
396        # Meta data file "path"
397        CMD "command-line"
398        CWD "cwd"
399        TARGET "target"
400        -- command output --
401        -- filemon acquired metadata --
402        # buildmon version 3
403        V 3
404        C "pid" "cwd"
405        E "pid" "path"
406        F "pid" "child"
407        R "pid" "path"
408        W "pid" "path"
409        X "pid" "status"
410        D "pid" "path"
411        L "pid" "src" "target"
412        M "pid" "old" "new"
413        S "pid" "path"
414        # Bye bye
415
416        We go to some effort to avoid processing a dependency more than once.
417        Of the above record types only C,E,F,L,R,V and W are of interest.
418        """
419
420        version = 0                     # unknown
421        if name:
422            self.name = name;
423        if file:
424            f = file
425            cwd = self.last_dir = self.cwd
426        else:
427            f = open(self.name, 'r')
428        skip = True
429        pid_cwd = {}
430        pid_last_dir = {}
431        last_pid = 0
432
433        self.line = 0
434        if self.curdir:
435            self.seenit(self.curdir)    # we ignore this
436
437        interesting = 'CEFLRV'
438        for line in f:
439            self.line += 1
440            # ignore anything we don't care about
441            if not line[0] in interesting:
442                continue
443            if self.debug > 2:
444                print("input:", line, end=' ', file=self.debug_out)
445            w = line.split()
446
447            if skip:
448                if w[0] == 'V':
449                    skip = False
450                    version = int(w[1])
451                    """
452                    if version < 4:
453                        # we cannot ignore 'W' records
454                        # as they may be 'rw'
455                        interesting += 'W'
456                    """
457                elif w[0] == 'CWD':
458                    self.cwd = cwd = self.last_dir = w[1]
459                    self.seenit(cwd)    # ignore this
460                    if self.debug:
461                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
462                continue
463
464            pid = int(w[1])
465            if pid != last_pid:
466                if last_pid:
467                    pid_last_dir[last_pid] = self.last_dir
468                cwd = pid_cwd.get(pid, self.cwd)
469                self.last_dir = pid_last_dir.get(pid, self.cwd)
470                last_pid = pid
471
472            # process operations
473            if w[0] == 'F':
474                npid = int(w[2])
475                pid_cwd[npid] = cwd
476                pid_last_dir[npid] = cwd
477                last_pid = npid
478                continue
479            elif w[0] == 'C':
480                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
481                if not cwd:
482                    cwd = w[2]
483                    if self.debug > 1:
484                        print("missing cwd=", cwd, file=self.debug_out)
485                if cwd.endswith('/.'):
486                    cwd = cwd[0:-2]
487                self.last_dir = pid_last_dir[pid] = cwd
488                pid_cwd[pid] = cwd
489                if self.debug > 1:
490                    print("cwd=", cwd, file=self.debug_out)
491                continue
492
493            if w[2] in self.seen:
494                if self.debug > 2:
495                    print("seen:", w[2], file=self.debug_out)
496                continue
497            # file operations
498            if w[0] in 'ML':
499                # these are special, tread src as read and
500                # target as write
501                self.parse_path(w[2].strip("'"), cwd, 'R', w)
502                self.parse_path(w[3].strip("'"), cwd, 'W', w)
503                continue
504            elif w[0] in 'ERWS':
505                path = w[2]
506                if path == '.':
507                    continue
508                self.parse_path(path, cwd, w[0], w)
509
510        assert(version > 0)
511        if not file:
512            f.close()
513
514    def is_src(self, base, dir, rdir):
515        """is base in srctop"""
516        for dir in [dir,rdir]:
517            if not dir:
518                continue
519            path = '/'.join([dir,base])
520            srctop = self.find_top(path, self.srctops)
521            if srctop:
522                if self.dpdeps:
523                    self.add(self.file_deps, path.replace(srctop,''), 'file')
524                self.add(self.src_deps, dir.replace(srctop,''), 'src')
525                self.seenit(dir)
526                return True
527        return False
528
529    def parse_path(self, path, cwd, op=None, w=[]):
530        """look at a path for the op specified"""
531
532        if not op:
533            op = w[0]
534
535        # we are never interested in .dirdep files as dependencies
536        if path.endswith('.dirdep'):
537            return
538        for p in self.excludes:
539            if p and path.startswith(p):
540                if self.debug > 2:
541                    print("exclude:", p, path, file=self.debug_out)
542                return
543        # we don't want to resolve the last component if it is
544        # a symlink
545        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
546        if not path:
547            return
548        dir,base = os.path.split(path)
549        if dir in self.seen:
550            if self.debug > 2:
551                print("seen:", dir, file=self.debug_out)
552            return
553        # we can have a path in an objdir which is a link
554        # to the src dir, we may need to add dependencies for each
555        rdir = dir
556        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
557        if dir:
558            rdir = os.path.realpath(dir)
559        else:
560            dir = rdir
561        if rdir == dir:
562            rdir = None
563        # now put path back together
564        path = '/'.join([dir,base])
565        if self.debug > 1:
566            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
567        if op in 'RWS':
568            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
569                if self.debug > 1:
570                    print("skipping:", path, file=self.debug_out)
571                return
572            if os.path.isdir(path):
573                if op in 'RW':
574                    self.last_dir = path;
575                if self.debug > 1:
576                    print("ldir=", self.last_dir, file=self.debug_out)
577                return
578
579        if op in 'ER':
580            # finally, we get down to it
581            if dir == self.cwd or dir == self.curdir:
582                return
583            if self.is_src(base, dir, rdir):
584                self.seenit(w[2])
585                if not rdir:
586                    return
587
588            objroot = None
589            for dir in [dir,rdir]:
590                if not dir:
591                    continue
592                objroot = self.find_top(dir, self.objroots)
593                if objroot:
594                    break
595            if objroot:
596                ddep = self.find_obj(objroot, dir, path, w[2])
597                if ddep:
598                    self.add(self.obj_deps, ddep, 'obj')
599                    if self.dpdeps and objroot.endswith('/stage/'):
600                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
601                        self.add(self.file_deps, sp, 'file')
602            else:
603                # don't waste time looking again
604                self.seenit(w[2])
605                self.seenit(dir)
606
607
608def main(argv, klass=MetaFile, xopts='', xoptf=None):
609    """Simple driver for class MetaFile.
610
611    Usage:
612        script [options] [key=value ...] "meta" ...
613
614    Options and key=value pairs contribute to the
615    dictionary passed to MetaFile.
616
617    -S "SRCTOP"
618                add "SRCTOP" to the "SRCTOPS" list.
619
620    -C "CURDIR"
621
622    -O "OBJROOT"
623                add "OBJROOT" to the "OBJROOTS" list.
624
625    -m "MACHINE"
626
627    -a "MACHINE_ARCH"
628
629    -H "HOST_TARGET"
630
631    -D "DPDEPS"
632
633    -d  bumps debug level
634
635    """
636    import getopt
637
638    # import Psyco if we can
639    # it can speed things up quite a bit
640    have_psyco = 0
641    try:
642        import psyco
643        psyco.full()
644        have_psyco = 1
645    except:
646        pass
647
648    conf = {
649        'SRCTOPS': [],
650        'OBJROOTS': [],
651        'EXCLUDES': [],
652        }
653
654    try:
655        machine = os.environ['MACHINE']
656        if machine:
657            conf['MACHINE'] = machine
658        machine_arch = os.environ['MACHINE_ARCH']
659        if machine_arch:
660            conf['MACHINE_ARCH'] = machine_arch
661        srctop = os.environ['SB_SRC']
662        if srctop:
663            conf['SRCTOPS'].append(srctop)
664        objroot = os.environ['SB_OBJROOT']
665        if objroot:
666            conf['OBJROOTS'].append(objroot)
667    except:
668        pass
669
670    debug = 0
671    output = True
672
673    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
674    for o, a in opts:
675        if o == '-a':
676            conf['MACHINE_ARCH'] = a
677        elif o == '-d':
678            debug += 1
679        elif o == '-q':
680            output = False
681        elif o == '-H':
682            conf['HOST_TARGET'] = a
683        elif o == '-S':
684            if a not in conf['SRCTOPS']:
685                conf['SRCTOPS'].append(a)
686        elif o == '-C':
687            conf['CURDIR'] = a
688        elif o == '-O':
689            if a not in conf['OBJROOTS']:
690                conf['OBJROOTS'].append(a)
691        elif o == '-R':
692            conf['RELDIR'] = a
693        elif o == '-D':
694            conf['DPDEPS'] = a
695        elif o == '-m':
696            conf['MACHINE'] = a
697        elif o == '-T':
698            conf['TARGET_SPEC'] = a
699        elif o == '-X':
700            if a not in conf['EXCLUDES']:
701                conf['EXCLUDES'].append(a)
702        elif xoptf:
703            xoptf(o, a, conf)
704
705    conf['debug'] = debug
706
707    # get any var=val assignments
708    eaten = []
709    for a in args:
710        if a.find('=') > 0:
711            k,v = a.split('=')
712            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
713                if k == 'SRCTOP':
714                    k = 'SRCTOPS'
715                elif k == 'OBJROOT':
716                    k = 'OBJROOTS'
717                if v not in conf[k]:
718                    conf[k].append(v)
719            else:
720                conf[k] = v
721            eaten.append(a)
722            continue
723        break
724
725    for a in eaten:
726        args.remove(a)
727
728    debug_out = conf.get('debug_out', sys.stderr)
729
730    if debug:
731        print("config:", file=debug_out)
732        print("psyco=", have_psyco, file=debug_out)
733        for k,v in list(conf.items()):
734            print("%s=%s" % (k,v), file=debug_out)
735
736    m = None
737    for a in args:
738        if a.endswith('.meta'):
739            if not os.path.exists(a):
740                continue
741            m = klass(a, conf)
742        elif a.startswith('@'):
743            # there can actually multiple files per line
744            for line in open(a[1:]):
745                for f in line.strip().split():
746                    if not os.path.exists(f):
747                        continue
748                    m = klass(f, conf)
749
750    if output and m:
751        print(m.dirdeps())
752
753        print(m.src_dirdeps('\nsrc:'))
754
755        dpdeps = conf.get('DPDEPS')
756        if dpdeps:
757            m.file_depends(open(dpdeps, 'w'))
758
759    return m
760
761if __name__ == '__main__':
762    try:
763        main(sys.argv)
764    except:
765        # yes, this goes to stdout
766        print("ERROR: ", sys.exc_info()[1])
767        raise
768
769