1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $FreeBSD$ 41 $Id: meta2deps.py,v 1.34 2020/10/02 03:11:17 sjg Exp $ 42 43 Copyright (c) 2011-2020, Simon J. Gerraty 44 Copyright (c) 2011-2017, Juniper Networks, Inc. 45 All rights reserved. 46 47 Redistribution and use in source and binary forms, with or without 48 modification, are permitted provided that the following conditions 49 are met: 50 1. Redistributions of source code must retain the above copyright 51 notice, this list of conditions and the following disclaimer. 52 2. Redistributions in binary form must reproduce the above copyright 53 notice, this list of conditions and the following disclaimer in the 54 documentation and/or other materials provided with the distribution. 55 56 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 57 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 58 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 59 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 60 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 61 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 62 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 63 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 64 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 65 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 66 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 67 68""" 69 70import os, re, sys 71 72def getv(dict, key, d=None): 73 """Lookup key in dict and return value or the supplied default.""" 74 if key in dict: 75 return dict[key] 76 return d 77 78def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 79 """ 80 Return an absolute path, resolving via cwd or last_dir if needed. 81 """ 82 if path.endswith('/.'): 83 path = path[0:-2] 84 if len(path) > 0 and path[0] == '/': 85 if os.path.exists(path): 86 return path 87 if debug > 2: 88 print("skipping non-existent:", path, file=debug_out) 89 return None 90 if path == '.': 91 return cwd 92 if path.startswith('./'): 93 return cwd + path[1:] 94 if last_dir == cwd: 95 last_dir = None 96 for d in [last_dir, cwd]: 97 if not d: 98 continue 99 if path == '..': 100 dw = d.split('/') 101 p = '/'.join(dw[:-1]) 102 if not p: 103 p = '/' 104 return p 105 p = '/'.join([d,path]) 106 if debug > 2: 107 print("looking for:", p, end=' ', file=debug_out) 108 if not os.path.exists(p): 109 if debug > 2: 110 print("nope", file=debug_out) 111 p = None 112 continue 113 if debug > 2: 114 print("found:", p, file=debug_out) 115 return p 116 return None 117 118def cleanpath(path): 119 """cleanup path without using realpath(3)""" 120 if path.startswith('/'): 121 r = '/' 122 else: 123 r = '' 124 p = [] 125 w = path.split('/') 126 for d in w: 127 if not d or d == '.': 128 continue 129 if d == '..': 130 try: 131 p.pop() 132 continue 133 except: 134 break 135 p.append(d) 136 137 return r + '/'.join(p) 138 139def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 140 """ 141 Return an absolute path, resolving via cwd or last_dir if needed. 142 this gets called a lot, so we try to avoid calling realpath. 143 """ 144 rpath = resolve(path, cwd, last_dir, debug, debug_out) 145 if rpath: 146 path = rpath 147 elif len(path) > 0 and path[0] == '/': 148 return None 149 if (path.find('/') < 0 or 150 path.find('./') > 0 or 151 path.endswith('/..')): 152 path = cleanpath(path) 153 return path 154 155def sort_unique(list, cmp=None, key=None, reverse=False): 156 list.sort(cmp, key, reverse) 157 nl = [] 158 le = None 159 for e in list: 160 if e == le: 161 continue 162 le = e 163 nl.append(e) 164 return nl 165 166def add_trims(x): 167 return ['/' + x + '/', 168 '/' + x, 169 x + '/', 170 x] 171 172class MetaFile: 173 """class to parse meta files generated by bmake.""" 174 175 conf = None 176 dirdep_re = None 177 host_target = None 178 srctops = [] 179 objroots = [] 180 excludes = [] 181 seen = {} 182 obj_deps = [] 183 src_deps = [] 184 file_deps = [] 185 186 def __init__(self, name, conf={}): 187 """if name is set we will parse it now. 188 conf can have the follwing keys: 189 190 SRCTOPS list of tops of the src tree(s). 191 192 CURDIR the src directory 'bmake' was run from. 193 194 RELDIR the relative path from SRCTOP to CURDIR 195 196 MACHINE the machine we built for. 197 set to 'none' if we are not cross-building. 198 More specifically if machine cannot be deduced from objdirs. 199 200 TARGET_SPEC 201 Sometimes MACHINE isn't enough. 202 203 HOST_TARGET 204 when we build for the pseudo machine 'host' 205 the object tree uses HOST_TARGET rather than MACHINE. 206 207 OBJROOTS a list of the common prefix for all obj dirs it might 208 end in '/' or '-'. 209 210 DPDEPS names an optional file to which per file dependencies 211 will be appended. 212 For example if 'some/path/foo.h' is read from SRCTOP 213 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 214 This can allow 'bmake' to learn all the dirs within 215 the tree that depend on 'foo.h' 216 217 EXCLUDES 218 A list of paths to ignore. 219 ccache(1) can otherwise be trouble. 220 221 debug desired debug level 222 223 debug_out open file to send debug output to (sys.stderr) 224 225 """ 226 227 self.name = name 228 self.debug = getv(conf, 'debug', 0) 229 self.debug_out = getv(conf, 'debug_out', sys.stderr) 230 231 self.machine = getv(conf, 'MACHINE', '') 232 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 233 self.target_spec = getv(conf, 'TARGET_SPEC', '') 234 self.curdir = getv(conf, 'CURDIR') 235 self.reldir = getv(conf, 'RELDIR') 236 self.dpdeps = getv(conf, 'DPDEPS') 237 self.line = 0 238 239 if not self.conf: 240 # some of the steps below we want to do only once 241 self.conf = conf 242 self.host_target = getv(conf, 'HOST_TARGET') 243 for srctop in getv(conf, 'SRCTOPS', []): 244 if srctop[-1] != '/': 245 srctop += '/' 246 if not srctop in self.srctops: 247 self.srctops.append(srctop) 248 _srctop = os.path.realpath(srctop) 249 if _srctop[-1] != '/': 250 _srctop += '/' 251 if not _srctop in self.srctops: 252 self.srctops.append(_srctop) 253 254 trim_list = add_trims(self.machine) 255 if self.machine == 'host': 256 trim_list += add_trims(self.host_target) 257 if self.target_spec: 258 trim_list += add_trims(self.target_spec) 259 260 for objroot in getv(conf, 'OBJROOTS', []): 261 for e in trim_list: 262 if objroot.endswith(e): 263 # this is not what we want - fix it 264 objroot = objroot[0:-len(e)] 265 266 if objroot[-1] != '/': 267 objroot += '/' 268 if not objroot in self.objroots: 269 self.objroots.append(objroot) 270 _objroot = os.path.realpath(objroot) 271 if objroot[-1] == '/': 272 _objroot += '/' 273 if not _objroot in self.objroots: 274 self.objroots.append(_objroot) 275 276 # we want the longest match 277 self.srctops.sort(reverse=True) 278 self.objroots.sort(reverse=True) 279 280 self.excludes = getv(conf, 'EXCLUDES', []) 281 282 if self.debug: 283 print("host_target=", self.host_target, file=self.debug_out) 284 print("srctops=", self.srctops, file=self.debug_out) 285 print("objroots=", self.objroots, file=self.debug_out) 286 print("excludes=", self.excludes, file=self.debug_out) 287 288 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 289 290 if self.dpdeps and not self.reldir: 291 if self.debug: 292 print("need reldir:", end=' ', file=self.debug_out) 293 if self.curdir: 294 srctop = self.find_top(self.curdir, self.srctops) 295 if srctop: 296 self.reldir = self.curdir.replace(srctop,'') 297 if self.debug: 298 print(self.reldir, file=self.debug_out) 299 if not self.reldir: 300 self.dpdeps = None # we cannot do it? 301 302 self.cwd = os.getcwd() # make sure this is initialized 303 self.last_dir = self.cwd 304 305 if name: 306 self.try_parse() 307 308 def reset(self): 309 """reset state if we are being passed meta files from multiple directories.""" 310 self.seen = {} 311 self.obj_deps = [] 312 self.src_deps = [] 313 self.file_deps = [] 314 315 def dirdeps(self, sep='\n'): 316 """return DIRDEPS""" 317 return sep.strip() + sep.join(self.obj_deps) 318 319 def src_dirdeps(self, sep='\n'): 320 """return SRC_DIRDEPS""" 321 return sep.strip() + sep.join(self.src_deps) 322 323 def file_depends(self, out=None): 324 """Append DPDEPS_${file} += ${RELDIR} 325 for each file we saw, to the output file.""" 326 if not self.reldir: 327 return None 328 for f in sort_unique(self.file_deps): 329 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 330 # these entries provide for reverse DIRDEPS lookup 331 for f in self.obj_deps: 332 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 333 334 def seenit(self, dir): 335 """rememer that we have seen dir.""" 336 self.seen[dir] = 1 337 338 def add(self, list, data, clue=''): 339 """add data to list if it isn't already there.""" 340 if data not in list: 341 list.append(data) 342 if self.debug: 343 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 344 345 def find_top(self, path, list): 346 """the logical tree may be split across multiple trees""" 347 for top in list: 348 if path.startswith(top): 349 if self.debug > 2: 350 print("found in", top, file=self.debug_out) 351 return top 352 return None 353 354 def find_obj(self, objroot, dir, path, input): 355 """return path within objroot, taking care of .dirdep files""" 356 ddep = None 357 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 358 if not ddep and os.path.exists(ddepf): 359 ddep = open(ddepf, 'r').readline().strip('# \n') 360 if self.debug > 1: 361 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 362 if ddep.endswith(self.machine): 363 ddep = ddep[0:-(1+len(self.machine))] 364 elif self.target_spec and ddep.endswith(self.target_spec): 365 ddep = ddep[0:-(1+len(self.target_spec))] 366 367 if not ddep: 368 # no .dirdeps, so remember that we've seen the raw input 369 self.seenit(input) 370 self.seenit(dir) 371 if self.machine == 'none': 372 if dir.startswith(objroot): 373 return dir.replace(objroot,'') 374 return None 375 m = self.dirdep_re.match(dir.replace(objroot,'')) 376 if m: 377 ddep = m.group(2) 378 dmachine = m.group(1) 379 if dmachine != self.machine: 380 if not (self.machine == 'host' and 381 dmachine == self.host_target): 382 if self.debug > 2: 383 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 384 ddep += '.' + dmachine 385 386 return ddep 387 388 def try_parse(self, name=None, file=None): 389 """give file and line number causing exception""" 390 try: 391 self.parse(name, file) 392 except: 393 # give a useful clue 394 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 395 raise 396 397 def parse(self, name=None, file=None): 398 """A meta file looks like: 399 400 # Meta data file "path" 401 CMD "command-line" 402 CWD "cwd" 403 TARGET "target" 404 -- command output -- 405 -- filemon acquired metadata -- 406 # buildmon version 3 407 V 3 408 C "pid" "cwd" 409 E "pid" "path" 410 F "pid" "child" 411 R "pid" "path" 412 W "pid" "path" 413 X "pid" "status" 414 D "pid" "path" 415 L "pid" "src" "target" 416 M "pid" "old" "new" 417 S "pid" "path" 418 # Bye bye 419 420 We go to some effort to avoid processing a dependency more than once. 421 Of the above record types only C,E,F,L,R,V and W are of interest. 422 """ 423 424 version = 0 # unknown 425 if name: 426 self.name = name; 427 if file: 428 f = file 429 cwd = self.last_dir = self.cwd 430 else: 431 f = open(self.name, 'r') 432 skip = True 433 pid_cwd = {} 434 pid_last_dir = {} 435 last_pid = 0 436 437 self.line = 0 438 if self.curdir: 439 self.seenit(self.curdir) # we ignore this 440 441 interesting = 'CEFLRV' 442 for line in f: 443 self.line += 1 444 # ignore anything we don't care about 445 if not line[0] in interesting: 446 continue 447 if self.debug > 2: 448 print("input:", line, end=' ', file=self.debug_out) 449 w = line.split() 450 451 if skip: 452 if w[0] == 'V': 453 skip = False 454 version = int(w[1]) 455 """ 456 if version < 4: 457 # we cannot ignore 'W' records 458 # as they may be 'rw' 459 interesting += 'W' 460 """ 461 elif w[0] == 'CWD': 462 self.cwd = cwd = self.last_dir = w[1] 463 self.seenit(cwd) # ignore this 464 if self.debug: 465 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 466 continue 467 468 pid = int(w[1]) 469 if pid != last_pid: 470 if last_pid: 471 pid_last_dir[last_pid] = self.last_dir 472 cwd = getv(pid_cwd, pid, self.cwd) 473 self.last_dir = getv(pid_last_dir, pid, self.cwd) 474 last_pid = pid 475 476 # process operations 477 if w[0] == 'F': 478 npid = int(w[2]) 479 pid_cwd[npid] = cwd 480 pid_last_dir[npid] = cwd 481 last_pid = npid 482 continue 483 elif w[0] == 'C': 484 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 485 if not cwd: 486 cwd = w[2] 487 if self.debug > 1: 488 print("missing cwd=", cwd, file=self.debug_out) 489 if cwd.endswith('/.'): 490 cwd = cwd[0:-2] 491 self.last_dir = pid_last_dir[pid] = cwd 492 pid_cwd[pid] = cwd 493 if self.debug > 1: 494 print("cwd=", cwd, file=self.debug_out) 495 continue 496 497 if w[2] in self.seen: 498 if self.debug > 2: 499 print("seen:", w[2], file=self.debug_out) 500 continue 501 # file operations 502 if w[0] in 'ML': 503 # these are special, tread src as read and 504 # target as write 505 self.parse_path(w[2].strip("'"), cwd, 'R', w) 506 self.parse_path(w[3].strip("'"), cwd, 'W', w) 507 continue 508 elif w[0] in 'ERWS': 509 path = w[2] 510 if path == '.': 511 continue 512 self.parse_path(path, cwd, w[0], w) 513 514 assert(version > 0) 515 if not file: 516 f.close() 517 518 def is_src(self, base, dir, rdir): 519 """is base in srctop""" 520 for dir in [dir,rdir]: 521 if not dir: 522 continue 523 path = '/'.join([dir,base]) 524 srctop = self.find_top(path, self.srctops) 525 if srctop: 526 if self.dpdeps: 527 self.add(self.file_deps, path.replace(srctop,''), 'file') 528 self.add(self.src_deps, dir.replace(srctop,''), 'src') 529 self.seenit(dir) 530 return True 531 return False 532 533 def parse_path(self, path, cwd, op=None, w=[]): 534 """look at a path for the op specified""" 535 536 if not op: 537 op = w[0] 538 539 # we are never interested in .dirdep files as dependencies 540 if path.endswith('.dirdep'): 541 return 542 for p in self.excludes: 543 if p and path.startswith(p): 544 if self.debug > 2: 545 print("exclude:", p, path, file=self.debug_out) 546 return 547 # we don't want to resolve the last component if it is 548 # a symlink 549 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 550 if not path: 551 return 552 dir,base = os.path.split(path) 553 if dir in self.seen: 554 if self.debug > 2: 555 print("seen:", dir, file=self.debug_out) 556 return 557 # we can have a path in an objdir which is a link 558 # to the src dir, we may need to add dependencies for each 559 rdir = dir 560 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 561 rdir = os.path.realpath(dir) 562 if rdir == dir: 563 rdir = None 564 # now put path back together 565 path = '/'.join([dir,base]) 566 if self.debug > 1: 567 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 568 if op in 'RWS': 569 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 570 if self.debug > 1: 571 print("skipping:", path, file=self.debug_out) 572 return 573 if os.path.isdir(path): 574 if op in 'RW': 575 self.last_dir = path; 576 if self.debug > 1: 577 print("ldir=", self.last_dir, file=self.debug_out) 578 return 579 580 if op in 'ER': 581 # finally, we get down to it 582 if dir == self.cwd or dir == self.curdir: 583 return 584 if self.is_src(base, dir, rdir): 585 self.seenit(w[2]) 586 if not rdir: 587 return 588 589 objroot = None 590 for dir in [dir,rdir]: 591 if not dir: 592 continue 593 objroot = self.find_top(dir, self.objroots) 594 if objroot: 595 break 596 if objroot: 597 ddep = self.find_obj(objroot, dir, path, w[2]) 598 if ddep: 599 self.add(self.obj_deps, ddep, 'obj') 600 if self.dpdeps and objroot.endswith('/stage/'): 601 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 602 self.add(self.file_deps, sp, 'file') 603 else: 604 # don't waste time looking again 605 self.seenit(w[2]) 606 self.seenit(dir) 607 608 609def main(argv, klass=MetaFile, xopts='', xoptf=None): 610 """Simple driver for class MetaFile. 611 612 Usage: 613 script [options] [key=value ...] "meta" ... 614 615 Options and key=value pairs contribute to the 616 dictionary passed to MetaFile. 617 618 -S "SRCTOP" 619 add "SRCTOP" to the "SRCTOPS" list. 620 621 -C "CURDIR" 622 623 -O "OBJROOT" 624 add "OBJROOT" to the "OBJROOTS" list. 625 626 -m "MACHINE" 627 628 -a "MACHINE_ARCH" 629 630 -H "HOST_TARGET" 631 632 -D "DPDEPS" 633 634 -d bumps debug level 635 636 """ 637 import getopt 638 639 # import Psyco if we can 640 # it can speed things up quite a bit 641 have_psyco = 0 642 try: 643 import psyco 644 psyco.full() 645 have_psyco = 1 646 except: 647 pass 648 649 conf = { 650 'SRCTOPS': [], 651 'OBJROOTS': [], 652 'EXCLUDES': [], 653 } 654 655 try: 656 machine = os.environ['MACHINE'] 657 if machine: 658 conf['MACHINE'] = machine 659 machine_arch = os.environ['MACHINE_ARCH'] 660 if machine_arch: 661 conf['MACHINE_ARCH'] = machine_arch 662 srctop = os.environ['SB_SRC'] 663 if srctop: 664 conf['SRCTOPS'].append(srctop) 665 objroot = os.environ['SB_OBJROOT'] 666 if objroot: 667 conf['OBJROOTS'].append(objroot) 668 except: 669 pass 670 671 debug = 0 672 output = True 673 674 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 675 for o, a in opts: 676 if o == '-a': 677 conf['MACHINE_ARCH'] = a 678 elif o == '-d': 679 debug += 1 680 elif o == '-q': 681 output = False 682 elif o == '-H': 683 conf['HOST_TARGET'] = a 684 elif o == '-S': 685 if a not in conf['SRCTOPS']: 686 conf['SRCTOPS'].append(a) 687 elif o == '-C': 688 conf['CURDIR'] = a 689 elif o == '-O': 690 if a not in conf['OBJROOTS']: 691 conf['OBJROOTS'].append(a) 692 elif o == '-R': 693 conf['RELDIR'] = a 694 elif o == '-D': 695 conf['DPDEPS'] = a 696 elif o == '-m': 697 conf['MACHINE'] = a 698 elif o == '-T': 699 conf['TARGET_SPEC'] = a 700 elif o == '-X': 701 if a not in conf['EXCLUDES']: 702 conf['EXCLUDES'].append(a) 703 elif xoptf: 704 xoptf(o, a, conf) 705 706 conf['debug'] = debug 707 708 # get any var=val assignments 709 eaten = [] 710 for a in args: 711 if a.find('=') > 0: 712 k,v = a.split('=') 713 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 714 if k == 'SRCTOP': 715 k = 'SRCTOPS' 716 elif k == 'OBJROOT': 717 k = 'OBJROOTS' 718 if v not in conf[k]: 719 conf[k].append(v) 720 else: 721 conf[k] = v 722 eaten.append(a) 723 continue 724 break 725 726 for a in eaten: 727 args.remove(a) 728 729 debug_out = getv(conf, 'debug_out', sys.stderr) 730 731 if debug: 732 print("config:", file=debug_out) 733 print("psyco=", have_psyco, file=debug_out) 734 for k,v in list(conf.items()): 735 print("%s=%s" % (k,v), file=debug_out) 736 737 m = None 738 for a in args: 739 if a.endswith('.meta'): 740 if not os.path.exists(a): 741 continue 742 m = klass(a, conf) 743 elif a.startswith('@'): 744 # there can actually multiple files per line 745 for line in open(a[1:]): 746 for f in line.strip().split(): 747 if not os.path.exists(f): 748 continue 749 m = klass(f, conf) 750 751 if output and m: 752 print(m.dirdeps()) 753 754 print(m.src_dirdeps('\nsrc:')) 755 756 dpdeps = getv(conf, 'DPDEPS') 757 if dpdeps: 758 m.file_depends(open(dpdeps, 'wb')) 759 760 return m 761 762if __name__ == '__main__': 763 try: 764 main(sys.argv) 765 except: 766 # yes, this goes to stdout 767 print("ERROR: ", sys.exc_info()[1]) 768 raise 769 770