xref: /freebsd/share/mk/meta2deps.py (revision 90b5fc95832da64a5f56295e687379732c33718f)
1#!/usr/bin/env python
2
3from __future__ import print_function
4
5"""
6This script parses each "meta" file and extracts the
7information needed to deduce build and src dependencies.
8
9It works much the same as the original shell script, but is
10*much* more efficient.
11
12The parsing work is handled by the class MetaFile.
13We only pay attention to a subset of the information in the
14"meta" files.  Specifically:
15
16'CWD'	to initialize our notion.
17
18'C'	to track chdir(2) on a per process basis
19
20'R'	files read are what we really care about.
21	directories read, provide a clue to resolving
22	subsequent relative paths.  That is if we cannot find
23	them relative to 'cwd', we check relative to the last
24	dir read.
25
26'W'	files opened for write or read-write,
27	for filemon V3 and earlier.
28
29'E'	files executed.
30
31'L'	files linked
32
33'V'	the filemon version, this record is used as a clue
34	that we have reached the interesting bit.
35
36"""
37
38"""
39RCSid:
40	$FreeBSD$
41	$Id: meta2deps.py,v 1.34 2020/10/02 03:11:17 sjg Exp $
42
43	Copyright (c) 2011-2020, Simon J. Gerraty
44	Copyright (c) 2011-2017, Juniper Networks, Inc.
45	All rights reserved.
46
47	Redistribution and use in source and binary forms, with or without
48	modification, are permitted provided that the following conditions
49	are met:
50	1. Redistributions of source code must retain the above copyright
51	   notice, this list of conditions and the following disclaimer.
52	2. Redistributions in binary form must reproduce the above copyright
53	   notice, this list of conditions and the following disclaimer in the
54	   documentation and/or other materials provided with the distribution.
55
56	THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
57	"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
58	LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
59	A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
60	OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61	SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
62	LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
63	DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64	THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
65	(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
66	OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
67
68"""
69
70import os, re, sys
71
72def getv(dict, key, d=None):
73    """Lookup key in dict and return value or the supplied default."""
74    if key in dict:
75        return dict[key]
76    return d
77
78def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
79    """
80    Return an absolute path, resolving via cwd or last_dir if needed.
81    """
82    if path.endswith('/.'):
83        path = path[0:-2]
84    if len(path) > 0 and path[0] == '/':
85        if os.path.exists(path):
86            return path
87        if debug > 2:
88            print("skipping non-existent:", path, file=debug_out)
89        return None
90    if path == '.':
91        return cwd
92    if path.startswith('./'):
93        return cwd + path[1:]
94    if last_dir == cwd:
95        last_dir = None
96    for d in [last_dir, cwd]:
97        if not d:
98            continue
99        if path == '..':
100            dw = d.split('/')
101            p = '/'.join(dw[:-1])
102            if not p:
103                p = '/'
104            return p
105        p = '/'.join([d,path])
106        if debug > 2:
107            print("looking for:", p, end=' ', file=debug_out)
108        if not os.path.exists(p):
109            if debug > 2:
110                print("nope", file=debug_out)
111            p = None
112            continue
113        if debug > 2:
114            print("found:", p, file=debug_out)
115        return p
116    return None
117
118def cleanpath(path):
119    """cleanup path without using realpath(3)"""
120    if path.startswith('/'):
121        r = '/'
122    else:
123        r = ''
124    p = []
125    w = path.split('/')
126    for d in w:
127        if not d or d == '.':
128            continue
129        if d == '..':
130            try:
131                p.pop()
132                continue
133            except:
134                break
135        p.append(d)
136
137    return r + '/'.join(p)
138
139def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
140    """
141    Return an absolute path, resolving via cwd or last_dir if needed.
142    this gets called a lot, so we try to avoid calling realpath.
143    """
144    rpath = resolve(path, cwd, last_dir, debug, debug_out)
145    if rpath:
146        path = rpath
147    elif len(path) > 0 and path[0] == '/':
148        return None
149    if (path.find('/') < 0 or
150        path.find('./') > 0 or
151        path.endswith('/..')):
152        path = cleanpath(path)
153    return path
154
155def sort_unique(list, cmp=None, key=None, reverse=False):
156    list.sort(cmp, key, reverse)
157    nl = []
158    le = None
159    for e in list:
160        if e == le:
161            continue
162        le = e
163        nl.append(e)
164    return nl
165
166def add_trims(x):
167    return ['/' + x + '/',
168            '/' + x,
169            x + '/',
170            x]
171
172class MetaFile:
173    """class to parse meta files generated by bmake."""
174
175    conf = None
176    dirdep_re = None
177    host_target = None
178    srctops = []
179    objroots = []
180    excludes = []
181    seen = {}
182    obj_deps = []
183    src_deps = []
184    file_deps = []
185
186    def __init__(self, name, conf={}):
187        """if name is set we will parse it now.
188        conf can have the follwing keys:
189
190        SRCTOPS list of tops of the src tree(s).
191
192        CURDIR  the src directory 'bmake' was run from.
193
194        RELDIR  the relative path from SRCTOP to CURDIR
195
196        MACHINE the machine we built for.
197                set to 'none' if we are not cross-building.
198                More specifically if machine cannot be deduced from objdirs.
199
200        TARGET_SPEC
201                Sometimes MACHINE isn't enough.
202
203        HOST_TARGET
204                when we build for the pseudo machine 'host'
205                the object tree uses HOST_TARGET rather than MACHINE.
206
207        OBJROOTS a list of the common prefix for all obj dirs it might
208                end in '/' or '-'.
209
210        DPDEPS  names an optional file to which per file dependencies
211                will be appended.
212                For example if 'some/path/foo.h' is read from SRCTOP
213                then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
214                This can allow 'bmake' to learn all the dirs within
215                the tree that depend on 'foo.h'
216
217        EXCLUDES
218                A list of paths to ignore.
219                ccache(1) can otherwise be trouble.
220
221        debug   desired debug level
222
223        debug_out open file to send debug output to (sys.stderr)
224
225        """
226
227        self.name = name
228        self.debug = getv(conf, 'debug', 0)
229        self.debug_out = getv(conf, 'debug_out', sys.stderr)
230
231        self.machine = getv(conf, 'MACHINE', '')
232        self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
233        self.target_spec = getv(conf, 'TARGET_SPEC', '')
234        self.curdir = getv(conf, 'CURDIR')
235        self.reldir = getv(conf, 'RELDIR')
236        self.dpdeps = getv(conf, 'DPDEPS')
237        self.line = 0
238
239        if not self.conf:
240            # some of the steps below we want to do only once
241            self.conf = conf
242            self.host_target = getv(conf, 'HOST_TARGET')
243            for srctop in getv(conf, 'SRCTOPS', []):
244                if srctop[-1] != '/':
245                    srctop += '/'
246                if not srctop in self.srctops:
247                    self.srctops.append(srctop)
248                _srctop = os.path.realpath(srctop)
249                if _srctop[-1] != '/':
250                    _srctop += '/'
251                if not _srctop in self.srctops:
252                    self.srctops.append(_srctop)
253
254            trim_list = add_trims(self.machine)
255            if self.machine == 'host':
256                trim_list += add_trims(self.host_target)
257            if self.target_spec:
258                trim_list += add_trims(self.target_spec)
259
260            for objroot in getv(conf, 'OBJROOTS', []):
261                for e in trim_list:
262                    if objroot.endswith(e):
263                        # this is not what we want - fix it
264                        objroot = objroot[0:-len(e)]
265
266                if objroot[-1] != '/':
267                    objroot += '/'
268                if not objroot in self.objroots:
269                    self.objroots.append(objroot)
270                    _objroot = os.path.realpath(objroot)
271                    if objroot[-1] == '/':
272                        _objroot += '/'
273                    if not _objroot in self.objroots:
274                        self.objroots.append(_objroot)
275
276            # we want the longest match
277            self.srctops.sort(reverse=True)
278            self.objroots.sort(reverse=True)
279
280            self.excludes = getv(conf, 'EXCLUDES', [])
281
282            if self.debug:
283                print("host_target=", self.host_target, file=self.debug_out)
284                print("srctops=", self.srctops, file=self.debug_out)
285                print("objroots=", self.objroots, file=self.debug_out)
286                print("excludes=", self.excludes, file=self.debug_out)
287
288            self.dirdep_re = re.compile(r'([^/]+)/(.+)')
289
290        if self.dpdeps and not self.reldir:
291            if self.debug:
292                print("need reldir:", end=' ', file=self.debug_out)
293            if self.curdir:
294                srctop = self.find_top(self.curdir, self.srctops)
295                if srctop:
296                    self.reldir = self.curdir.replace(srctop,'')
297                    if self.debug:
298                        print(self.reldir, file=self.debug_out)
299            if not self.reldir:
300                self.dpdeps = None      # we cannot do it?
301
302        self.cwd = os.getcwd()          # make sure this is initialized
303        self.last_dir = self.cwd
304
305        if name:
306            self.try_parse()
307
308    def reset(self):
309        """reset state if we are being passed meta files from multiple directories."""
310        self.seen = {}
311        self.obj_deps = []
312        self.src_deps = []
313        self.file_deps = []
314
315    def dirdeps(self, sep='\n'):
316        """return DIRDEPS"""
317        return sep.strip() + sep.join(self.obj_deps)
318
319    def src_dirdeps(self, sep='\n'):
320        """return SRC_DIRDEPS"""
321        return sep.strip() + sep.join(self.src_deps)
322
323    def file_depends(self, out=None):
324        """Append DPDEPS_${file} += ${RELDIR}
325        for each file we saw, to the output file."""
326        if not self.reldir:
327            return None
328        for f in sort_unique(self.file_deps):
329            print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
330        # these entries provide for reverse DIRDEPS lookup
331        for f in self.obj_deps:
332            print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
333
334    def seenit(self, dir):
335        """rememer that we have seen dir."""
336        self.seen[dir] = 1
337
338    def add(self, list, data, clue=''):
339        """add data to list if it isn't already there."""
340        if data not in list:
341            list.append(data)
342            if self.debug:
343                print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
344
345    def find_top(self, path, list):
346        """the logical tree may be split across multiple trees"""
347        for top in list:
348            if path.startswith(top):
349                if self.debug > 2:
350                    print("found in", top, file=self.debug_out)
351                return top
352        return None
353
354    def find_obj(self, objroot, dir, path, input):
355        """return path within objroot, taking care of .dirdep files"""
356        ddep = None
357        for ddepf in [path + '.dirdep', dir + '/.dirdep']:
358            if not ddep and os.path.exists(ddepf):
359                ddep = open(ddepf, 'r').readline().strip('# \n')
360                if self.debug > 1:
361                    print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
362                if ddep.endswith(self.machine):
363                    ddep = ddep[0:-(1+len(self.machine))]
364                elif self.target_spec and ddep.endswith(self.target_spec):
365                    ddep = ddep[0:-(1+len(self.target_spec))]
366
367        if not ddep:
368            # no .dirdeps, so remember that we've seen the raw input
369            self.seenit(input)
370            self.seenit(dir)
371            if self.machine == 'none':
372                if dir.startswith(objroot):
373                    return dir.replace(objroot,'')
374                return None
375            m = self.dirdep_re.match(dir.replace(objroot,''))
376            if m:
377                ddep = m.group(2)
378                dmachine = m.group(1)
379                if dmachine != self.machine:
380                    if not (self.machine == 'host' and
381                            dmachine == self.host_target):
382                        if self.debug > 2:
383                            print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
384                        ddep += '.' + dmachine
385
386        return ddep
387
388    def try_parse(self, name=None, file=None):
389        """give file and line number causing exception"""
390        try:
391            self.parse(name, file)
392        except:
393            # give a useful clue
394            print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
395            raise
396
397    def parse(self, name=None, file=None):
398        """A meta file looks like:
399
400        # Meta data file "path"
401        CMD "command-line"
402        CWD "cwd"
403        TARGET "target"
404        -- command output --
405        -- filemon acquired metadata --
406        # buildmon version 3
407        V 3
408        C "pid" "cwd"
409        E "pid" "path"
410        F "pid" "child"
411        R "pid" "path"
412        W "pid" "path"
413        X "pid" "status"
414        D "pid" "path"
415        L "pid" "src" "target"
416        M "pid" "old" "new"
417        S "pid" "path"
418        # Bye bye
419
420        We go to some effort to avoid processing a dependency more than once.
421        Of the above record types only C,E,F,L,R,V and W are of interest.
422        """
423
424        version = 0                     # unknown
425        if name:
426            self.name = name;
427        if file:
428            f = file
429            cwd = self.last_dir = self.cwd
430        else:
431            f = open(self.name, 'r')
432        skip = True
433        pid_cwd = {}
434        pid_last_dir = {}
435        last_pid = 0
436
437        self.line = 0
438        if self.curdir:
439            self.seenit(self.curdir)    # we ignore this
440
441        interesting = 'CEFLRV'
442        for line in f:
443            self.line += 1
444            # ignore anything we don't care about
445            if not line[0] in interesting:
446                continue
447            if self.debug > 2:
448                print("input:", line, end=' ', file=self.debug_out)
449            w = line.split()
450
451            if skip:
452                if w[0] == 'V':
453                    skip = False
454                    version = int(w[1])
455                    """
456                    if version < 4:
457                        # we cannot ignore 'W' records
458                        # as they may be 'rw'
459                        interesting += 'W'
460                    """
461                elif w[0] == 'CWD':
462                    self.cwd = cwd = self.last_dir = w[1]
463                    self.seenit(cwd)    # ignore this
464                    if self.debug:
465                        print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
466                continue
467
468            pid = int(w[1])
469            if pid != last_pid:
470                if last_pid:
471                    pid_last_dir[last_pid] = self.last_dir
472                cwd = getv(pid_cwd, pid, self.cwd)
473                self.last_dir = getv(pid_last_dir, pid, self.cwd)
474                last_pid = pid
475
476            # process operations
477            if w[0] == 'F':
478                npid = int(w[2])
479                pid_cwd[npid] = cwd
480                pid_last_dir[npid] = cwd
481                last_pid = npid
482                continue
483            elif w[0] == 'C':
484                cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
485                if not cwd:
486                    cwd = w[2]
487                    if self.debug > 1:
488                        print("missing cwd=", cwd, file=self.debug_out)
489                if cwd.endswith('/.'):
490                    cwd = cwd[0:-2]
491                self.last_dir = pid_last_dir[pid] = cwd
492                pid_cwd[pid] = cwd
493                if self.debug > 1:
494                    print("cwd=", cwd, file=self.debug_out)
495                continue
496
497            if w[2] in self.seen:
498                if self.debug > 2:
499                    print("seen:", w[2], file=self.debug_out)
500                continue
501            # file operations
502            if w[0] in 'ML':
503                # these are special, tread src as read and
504                # target as write
505                self.parse_path(w[2].strip("'"), cwd, 'R', w)
506                self.parse_path(w[3].strip("'"), cwd, 'W', w)
507                continue
508            elif w[0] in 'ERWS':
509                path = w[2]
510                if path == '.':
511                    continue
512                self.parse_path(path, cwd, w[0], w)
513
514        assert(version > 0)
515        if not file:
516            f.close()
517
518    def is_src(self, base, dir, rdir):
519        """is base in srctop"""
520        for dir in [dir,rdir]:
521            if not dir:
522                continue
523            path = '/'.join([dir,base])
524            srctop = self.find_top(path, self.srctops)
525            if srctop:
526                if self.dpdeps:
527                    self.add(self.file_deps, path.replace(srctop,''), 'file')
528                self.add(self.src_deps, dir.replace(srctop,''), 'src')
529                self.seenit(dir)
530                return True
531        return False
532
533    def parse_path(self, path, cwd, op=None, w=[]):
534        """look at a path for the op specified"""
535
536        if not op:
537            op = w[0]
538
539        # we are never interested in .dirdep files as dependencies
540        if path.endswith('.dirdep'):
541            return
542        for p in self.excludes:
543            if p and path.startswith(p):
544                if self.debug > 2:
545                    print("exclude:", p, path, file=self.debug_out)
546                return
547        # we don't want to resolve the last component if it is
548        # a symlink
549        path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
550        if not path:
551            return
552        dir,base = os.path.split(path)
553        if dir in self.seen:
554            if self.debug > 2:
555                print("seen:", dir, file=self.debug_out)
556            return
557        # we can have a path in an objdir which is a link
558        # to the src dir, we may need to add dependencies for each
559        rdir = dir
560        dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
561        rdir = os.path.realpath(dir)
562        if rdir == dir:
563            rdir = None
564        # now put path back together
565        path = '/'.join([dir,base])
566        if self.debug > 1:
567            print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
568        if op in 'RWS':
569            if path in [self.last_dir, cwd, self.cwd, self.curdir]:
570                if self.debug > 1:
571                    print("skipping:", path, file=self.debug_out)
572                return
573            if os.path.isdir(path):
574                if op in 'RW':
575                    self.last_dir = path;
576                if self.debug > 1:
577                    print("ldir=", self.last_dir, file=self.debug_out)
578                return
579
580        if op in 'ER':
581            # finally, we get down to it
582            if dir == self.cwd or dir == self.curdir:
583                return
584            if self.is_src(base, dir, rdir):
585                self.seenit(w[2])
586                if not rdir:
587                    return
588
589            objroot = None
590            for dir in [dir,rdir]:
591                if not dir:
592                    continue
593                objroot = self.find_top(dir, self.objroots)
594                if objroot:
595                    break
596            if objroot:
597                ddep = self.find_obj(objroot, dir, path, w[2])
598                if ddep:
599                    self.add(self.obj_deps, ddep, 'obj')
600                    if self.dpdeps and objroot.endswith('/stage/'):
601                        sp = '/'.join(path.replace(objroot,'').split('/')[1:])
602                        self.add(self.file_deps, sp, 'file')
603            else:
604                # don't waste time looking again
605                self.seenit(w[2])
606                self.seenit(dir)
607
608
609def main(argv, klass=MetaFile, xopts='', xoptf=None):
610    """Simple driver for class MetaFile.
611
612    Usage:
613        script [options] [key=value ...] "meta" ...
614
615    Options and key=value pairs contribute to the
616    dictionary passed to MetaFile.
617
618    -S "SRCTOP"
619                add "SRCTOP" to the "SRCTOPS" list.
620
621    -C "CURDIR"
622
623    -O "OBJROOT"
624                add "OBJROOT" to the "OBJROOTS" list.
625
626    -m "MACHINE"
627
628    -a "MACHINE_ARCH"
629
630    -H "HOST_TARGET"
631
632    -D "DPDEPS"
633
634    -d  bumps debug level
635
636    """
637    import getopt
638
639    # import Psyco if we can
640    # it can speed things up quite a bit
641    have_psyco = 0
642    try:
643        import psyco
644        psyco.full()
645        have_psyco = 1
646    except:
647        pass
648
649    conf = {
650        'SRCTOPS': [],
651        'OBJROOTS': [],
652        'EXCLUDES': [],
653        }
654
655    try:
656        machine = os.environ['MACHINE']
657        if machine:
658            conf['MACHINE'] = machine
659        machine_arch = os.environ['MACHINE_ARCH']
660        if machine_arch:
661            conf['MACHINE_ARCH'] = machine_arch
662        srctop = os.environ['SB_SRC']
663        if srctop:
664            conf['SRCTOPS'].append(srctop)
665        objroot = os.environ['SB_OBJROOT']
666        if objroot:
667            conf['OBJROOTS'].append(objroot)
668    except:
669        pass
670
671    debug = 0
672    output = True
673
674    opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
675    for o, a in opts:
676        if o == '-a':
677            conf['MACHINE_ARCH'] = a
678        elif o == '-d':
679            debug += 1
680        elif o == '-q':
681            output = False
682        elif o == '-H':
683            conf['HOST_TARGET'] = a
684        elif o == '-S':
685            if a not in conf['SRCTOPS']:
686                conf['SRCTOPS'].append(a)
687        elif o == '-C':
688            conf['CURDIR'] = a
689        elif o == '-O':
690            if a not in conf['OBJROOTS']:
691                conf['OBJROOTS'].append(a)
692        elif o == '-R':
693            conf['RELDIR'] = a
694        elif o == '-D':
695            conf['DPDEPS'] = a
696        elif o == '-m':
697            conf['MACHINE'] = a
698        elif o == '-T':
699            conf['TARGET_SPEC'] = a
700        elif o == '-X':
701            if a not in conf['EXCLUDES']:
702                conf['EXCLUDES'].append(a)
703        elif xoptf:
704            xoptf(o, a, conf)
705
706    conf['debug'] = debug
707
708    # get any var=val assignments
709    eaten = []
710    for a in args:
711        if a.find('=') > 0:
712            k,v = a.split('=')
713            if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
714                if k == 'SRCTOP':
715                    k = 'SRCTOPS'
716                elif k == 'OBJROOT':
717                    k = 'OBJROOTS'
718                if v not in conf[k]:
719                    conf[k].append(v)
720            else:
721                conf[k] = v
722            eaten.append(a)
723            continue
724        break
725
726    for a in eaten:
727        args.remove(a)
728
729    debug_out = getv(conf, 'debug_out', sys.stderr)
730
731    if debug:
732        print("config:", file=debug_out)
733        print("psyco=", have_psyco, file=debug_out)
734        for k,v in list(conf.items()):
735            print("%s=%s" % (k,v), file=debug_out)
736
737    m = None
738    for a in args:
739        if a.endswith('.meta'):
740            if not os.path.exists(a):
741                continue
742            m = klass(a, conf)
743        elif a.startswith('@'):
744            # there can actually multiple files per line
745            for line in open(a[1:]):
746                for f in line.strip().split():
747                    if not os.path.exists(f):
748                        continue
749                    m = klass(f, conf)
750
751    if output and m:
752        print(m.dirdeps())
753
754        print(m.src_dirdeps('\nsrc:'))
755
756        dpdeps = getv(conf, 'DPDEPS')
757        if dpdeps:
758            m.file_depends(open(dpdeps, 'wb'))
759
760    return m
761
762if __name__ == '__main__':
763    try:
764        main(sys.argv)
765    except:
766        # yes, this goes to stdout
767        print("ERROR: ", sys.exc_info()[1])
768        raise
769
770