1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.34 2020/10/02 03:11:17 sjg Exp $ 41 42 Copyright (c) 2011-2020, Simon J. Gerraty 43 Copyright (c) 2011-2017, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 if os.path.exists(path): 85 return path 86 if debug > 2: 87 print("skipping non-existent:", path, file=debug_out) 88 return None 89 if path == '.': 90 return cwd 91 if path.startswith('./'): 92 return cwd + path[1:] 93 if last_dir == cwd: 94 last_dir = None 95 for d in [last_dir, cwd]: 96 if not d: 97 continue 98 if path == '..': 99 dw = d.split('/') 100 p = '/'.join(dw[:-1]) 101 if not p: 102 p = '/' 103 return p 104 p = '/'.join([d,path]) 105 if debug > 2: 106 print("looking for:", p, end=' ', file=debug_out) 107 if not os.path.exists(p): 108 if debug > 2: 109 print("nope", file=debug_out) 110 p = None 111 continue 112 if debug > 2: 113 print("found:", p, file=debug_out) 114 return p 115 return None 116 117def cleanpath(path): 118 """cleanup path without using realpath(3)""" 119 if path.startswith('/'): 120 r = '/' 121 else: 122 r = '' 123 p = [] 124 w = path.split('/') 125 for d in w: 126 if not d or d == '.': 127 continue 128 if d == '..': 129 try: 130 p.pop() 131 continue 132 except: 133 break 134 p.append(d) 135 136 return r + '/'.join(p) 137 138def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 139 """ 140 Return an absolute path, resolving via cwd or last_dir if needed. 141 this gets called a lot, so we try to avoid calling realpath. 142 """ 143 rpath = resolve(path, cwd, last_dir, debug, debug_out) 144 if rpath: 145 path = rpath 146 elif len(path) > 0 and path[0] == '/': 147 return None 148 if (path.find('/') < 0 or 149 path.find('./') > 0 or 150 path.endswith('/..')): 151 path = cleanpath(path) 152 return path 153 154def sort_unique(list, cmp=None, key=None, reverse=False): 155 list.sort(cmp, key, reverse) 156 nl = [] 157 le = None 158 for e in list: 159 if e == le: 160 continue 161 le = e 162 nl.append(e) 163 return nl 164 165def add_trims(x): 166 return ['/' + x + '/', 167 '/' + x, 168 x + '/', 169 x] 170 171class MetaFile: 172 """class to parse meta files generated by bmake.""" 173 174 conf = None 175 dirdep_re = None 176 host_target = None 177 srctops = [] 178 objroots = [] 179 excludes = [] 180 seen = {} 181 obj_deps = [] 182 src_deps = [] 183 file_deps = [] 184 185 def __init__(self, name, conf={}): 186 """if name is set we will parse it now. 187 conf can have the follwing keys: 188 189 SRCTOPS list of tops of the src tree(s). 190 191 CURDIR the src directory 'bmake' was run from. 192 193 RELDIR the relative path from SRCTOP to CURDIR 194 195 MACHINE the machine we built for. 196 set to 'none' if we are not cross-building. 197 More specifically if machine cannot be deduced from objdirs. 198 199 TARGET_SPEC 200 Sometimes MACHINE isn't enough. 201 202 HOST_TARGET 203 when we build for the pseudo machine 'host' 204 the object tree uses HOST_TARGET rather than MACHINE. 205 206 OBJROOTS a list of the common prefix for all obj dirs it might 207 end in '/' or '-'. 208 209 DPDEPS names an optional file to which per file dependencies 210 will be appended. 211 For example if 'some/path/foo.h' is read from SRCTOP 212 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 213 This can allow 'bmake' to learn all the dirs within 214 the tree that depend on 'foo.h' 215 216 EXCLUDES 217 A list of paths to ignore. 218 ccache(1) can otherwise be trouble. 219 220 debug desired debug level 221 222 debug_out open file to send debug output to (sys.stderr) 223 224 """ 225 226 self.name = name 227 self.debug = getv(conf, 'debug', 0) 228 self.debug_out = getv(conf, 'debug_out', sys.stderr) 229 230 self.machine = getv(conf, 'MACHINE', '') 231 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 232 self.target_spec = getv(conf, 'TARGET_SPEC', '') 233 self.curdir = getv(conf, 'CURDIR') 234 self.reldir = getv(conf, 'RELDIR') 235 self.dpdeps = getv(conf, 'DPDEPS') 236 self.line = 0 237 238 if not self.conf: 239 # some of the steps below we want to do only once 240 self.conf = conf 241 self.host_target = getv(conf, 'HOST_TARGET') 242 for srctop in getv(conf, 'SRCTOPS', []): 243 if srctop[-1] != '/': 244 srctop += '/' 245 if not srctop in self.srctops: 246 self.srctops.append(srctop) 247 _srctop = os.path.realpath(srctop) 248 if _srctop[-1] != '/': 249 _srctop += '/' 250 if not _srctop in self.srctops: 251 self.srctops.append(_srctop) 252 253 trim_list = add_trims(self.machine) 254 if self.machine == 'host': 255 trim_list += add_trims(self.host_target) 256 if self.target_spec: 257 trim_list += add_trims(self.target_spec) 258 259 for objroot in getv(conf, 'OBJROOTS', []): 260 for e in trim_list: 261 if objroot.endswith(e): 262 # this is not what we want - fix it 263 objroot = objroot[0:-len(e)] 264 265 if objroot[-1] != '/': 266 objroot += '/' 267 if not objroot in self.objroots: 268 self.objroots.append(objroot) 269 _objroot = os.path.realpath(objroot) 270 if objroot[-1] == '/': 271 _objroot += '/' 272 if not _objroot in self.objroots: 273 self.objroots.append(_objroot) 274 275 # we want the longest match 276 self.srctops.sort(reverse=True) 277 self.objroots.sort(reverse=True) 278 279 self.excludes = getv(conf, 'EXCLUDES', []) 280 281 if self.debug: 282 print("host_target=", self.host_target, file=self.debug_out) 283 print("srctops=", self.srctops, file=self.debug_out) 284 print("objroots=", self.objroots, file=self.debug_out) 285 print("excludes=", self.excludes, file=self.debug_out) 286 287 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 288 289 if self.dpdeps and not self.reldir: 290 if self.debug: 291 print("need reldir:", end=' ', file=self.debug_out) 292 if self.curdir: 293 srctop = self.find_top(self.curdir, self.srctops) 294 if srctop: 295 self.reldir = self.curdir.replace(srctop,'') 296 if self.debug: 297 print(self.reldir, file=self.debug_out) 298 if not self.reldir: 299 self.dpdeps = None # we cannot do it? 300 301 self.cwd = os.getcwd() # make sure this is initialized 302 self.last_dir = self.cwd 303 304 if name: 305 self.try_parse() 306 307 def reset(self): 308 """reset state if we are being passed meta files from multiple directories.""" 309 self.seen = {} 310 self.obj_deps = [] 311 self.src_deps = [] 312 self.file_deps = [] 313 314 def dirdeps(self, sep='\n'): 315 """return DIRDEPS""" 316 return sep.strip() + sep.join(self.obj_deps) 317 318 def src_dirdeps(self, sep='\n'): 319 """return SRC_DIRDEPS""" 320 return sep.strip() + sep.join(self.src_deps) 321 322 def file_depends(self, out=None): 323 """Append DPDEPS_${file} += ${RELDIR} 324 for each file we saw, to the output file.""" 325 if not self.reldir: 326 return None 327 for f in sort_unique(self.file_deps): 328 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 329 # these entries provide for reverse DIRDEPS lookup 330 for f in self.obj_deps: 331 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 332 333 def seenit(self, dir): 334 """rememer that we have seen dir.""" 335 self.seen[dir] = 1 336 337 def add(self, list, data, clue=''): 338 """add data to list if it isn't already there.""" 339 if data not in list: 340 list.append(data) 341 if self.debug: 342 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 343 344 def find_top(self, path, list): 345 """the logical tree may be split across multiple trees""" 346 for top in list: 347 if path.startswith(top): 348 if self.debug > 2: 349 print("found in", top, file=self.debug_out) 350 return top 351 return None 352 353 def find_obj(self, objroot, dir, path, input): 354 """return path within objroot, taking care of .dirdep files""" 355 ddep = None 356 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 357 if not ddep and os.path.exists(ddepf): 358 ddep = open(ddepf, 'r').readline().strip('# \n') 359 if self.debug > 1: 360 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 361 if ddep.endswith(self.machine): 362 ddep = ddep[0:-(1+len(self.machine))] 363 elif self.target_spec and ddep.endswith(self.target_spec): 364 ddep = ddep[0:-(1+len(self.target_spec))] 365 366 if not ddep: 367 # no .dirdeps, so remember that we've seen the raw input 368 self.seenit(input) 369 self.seenit(dir) 370 if self.machine == 'none': 371 if dir.startswith(objroot): 372 return dir.replace(objroot,'') 373 return None 374 m = self.dirdep_re.match(dir.replace(objroot,'')) 375 if m: 376 ddep = m.group(2) 377 dmachine = m.group(1) 378 if dmachine != self.machine: 379 if not (self.machine == 'host' and 380 dmachine == self.host_target): 381 if self.debug > 2: 382 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 383 ddep += '.' + dmachine 384 385 return ddep 386 387 def try_parse(self, name=None, file=None): 388 """give file and line number causing exception""" 389 try: 390 self.parse(name, file) 391 except: 392 # give a useful clue 393 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 394 raise 395 396 def parse(self, name=None, file=None): 397 """A meta file looks like: 398 399 # Meta data file "path" 400 CMD "command-line" 401 CWD "cwd" 402 TARGET "target" 403 -- command output -- 404 -- filemon acquired metadata -- 405 # buildmon version 3 406 V 3 407 C "pid" "cwd" 408 E "pid" "path" 409 F "pid" "child" 410 R "pid" "path" 411 W "pid" "path" 412 X "pid" "status" 413 D "pid" "path" 414 L "pid" "src" "target" 415 M "pid" "old" "new" 416 S "pid" "path" 417 # Bye bye 418 419 We go to some effort to avoid processing a dependency more than once. 420 Of the above record types only C,E,F,L,R,V and W are of interest. 421 """ 422 423 version = 0 # unknown 424 if name: 425 self.name = name; 426 if file: 427 f = file 428 cwd = self.last_dir = self.cwd 429 else: 430 f = open(self.name, 'r') 431 skip = True 432 pid_cwd = {} 433 pid_last_dir = {} 434 last_pid = 0 435 436 self.line = 0 437 if self.curdir: 438 self.seenit(self.curdir) # we ignore this 439 440 interesting = 'CEFLRV' 441 for line in f: 442 self.line += 1 443 # ignore anything we don't care about 444 if not line[0] in interesting: 445 continue 446 if self.debug > 2: 447 print("input:", line, end=' ', file=self.debug_out) 448 w = line.split() 449 450 if skip: 451 if w[0] == 'V': 452 skip = False 453 version = int(w[1]) 454 """ 455 if version < 4: 456 # we cannot ignore 'W' records 457 # as they may be 'rw' 458 interesting += 'W' 459 """ 460 elif w[0] == 'CWD': 461 self.cwd = cwd = self.last_dir = w[1] 462 self.seenit(cwd) # ignore this 463 if self.debug: 464 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 465 continue 466 467 pid = int(w[1]) 468 if pid != last_pid: 469 if last_pid: 470 pid_last_dir[last_pid] = self.last_dir 471 cwd = getv(pid_cwd, pid, self.cwd) 472 self.last_dir = getv(pid_last_dir, pid, self.cwd) 473 last_pid = pid 474 475 # process operations 476 if w[0] == 'F': 477 npid = int(w[2]) 478 pid_cwd[npid] = cwd 479 pid_last_dir[npid] = cwd 480 last_pid = npid 481 continue 482 elif w[0] == 'C': 483 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 484 if not cwd: 485 cwd = w[2] 486 if self.debug > 1: 487 print("missing cwd=", cwd, file=self.debug_out) 488 if cwd.endswith('/.'): 489 cwd = cwd[0:-2] 490 self.last_dir = pid_last_dir[pid] = cwd 491 pid_cwd[pid] = cwd 492 if self.debug > 1: 493 print("cwd=", cwd, file=self.debug_out) 494 continue 495 496 if w[2] in self.seen: 497 if self.debug > 2: 498 print("seen:", w[2], file=self.debug_out) 499 continue 500 # file operations 501 if w[0] in 'ML': 502 # these are special, tread src as read and 503 # target as write 504 self.parse_path(w[2].strip("'"), cwd, 'R', w) 505 self.parse_path(w[3].strip("'"), cwd, 'W', w) 506 continue 507 elif w[0] in 'ERWS': 508 path = w[2] 509 if path == '.': 510 continue 511 self.parse_path(path, cwd, w[0], w) 512 513 assert(version > 0) 514 if not file: 515 f.close() 516 517 def is_src(self, base, dir, rdir): 518 """is base in srctop""" 519 for dir in [dir,rdir]: 520 if not dir: 521 continue 522 path = '/'.join([dir,base]) 523 srctop = self.find_top(path, self.srctops) 524 if srctop: 525 if self.dpdeps: 526 self.add(self.file_deps, path.replace(srctop,''), 'file') 527 self.add(self.src_deps, dir.replace(srctop,''), 'src') 528 self.seenit(dir) 529 return True 530 return False 531 532 def parse_path(self, path, cwd, op=None, w=[]): 533 """look at a path for the op specified""" 534 535 if not op: 536 op = w[0] 537 538 # we are never interested in .dirdep files as dependencies 539 if path.endswith('.dirdep'): 540 return 541 for p in self.excludes: 542 if p and path.startswith(p): 543 if self.debug > 2: 544 print("exclude:", p, path, file=self.debug_out) 545 return 546 # we don't want to resolve the last component if it is 547 # a symlink 548 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 549 if not path: 550 return 551 dir,base = os.path.split(path) 552 if dir in self.seen: 553 if self.debug > 2: 554 print("seen:", dir, file=self.debug_out) 555 return 556 # we can have a path in an objdir which is a link 557 # to the src dir, we may need to add dependencies for each 558 rdir = dir 559 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 560 rdir = os.path.realpath(dir) 561 if rdir == dir: 562 rdir = None 563 # now put path back together 564 path = '/'.join([dir,base]) 565 if self.debug > 1: 566 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 567 if op in 'RWS': 568 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 569 if self.debug > 1: 570 print("skipping:", path, file=self.debug_out) 571 return 572 if os.path.isdir(path): 573 if op in 'RW': 574 self.last_dir = path; 575 if self.debug > 1: 576 print("ldir=", self.last_dir, file=self.debug_out) 577 return 578 579 if op in 'ER': 580 # finally, we get down to it 581 if dir == self.cwd or dir == self.curdir: 582 return 583 if self.is_src(base, dir, rdir): 584 self.seenit(w[2]) 585 if not rdir: 586 return 587 588 objroot = None 589 for dir in [dir,rdir]: 590 if not dir: 591 continue 592 objroot = self.find_top(dir, self.objroots) 593 if objroot: 594 break 595 if objroot: 596 ddep = self.find_obj(objroot, dir, path, w[2]) 597 if ddep: 598 self.add(self.obj_deps, ddep, 'obj') 599 if self.dpdeps and objroot.endswith('/stage/'): 600 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 601 self.add(self.file_deps, sp, 'file') 602 else: 603 # don't waste time looking again 604 self.seenit(w[2]) 605 self.seenit(dir) 606 607 608def main(argv, klass=MetaFile, xopts='', xoptf=None): 609 """Simple driver for class MetaFile. 610 611 Usage: 612 script [options] [key=value ...] "meta" ... 613 614 Options and key=value pairs contribute to the 615 dictionary passed to MetaFile. 616 617 -S "SRCTOP" 618 add "SRCTOP" to the "SRCTOPS" list. 619 620 -C "CURDIR" 621 622 -O "OBJROOT" 623 add "OBJROOT" to the "OBJROOTS" list. 624 625 -m "MACHINE" 626 627 -a "MACHINE_ARCH" 628 629 -H "HOST_TARGET" 630 631 -D "DPDEPS" 632 633 -d bumps debug level 634 635 """ 636 import getopt 637 638 # import Psyco if we can 639 # it can speed things up quite a bit 640 have_psyco = 0 641 try: 642 import psyco 643 psyco.full() 644 have_psyco = 1 645 except: 646 pass 647 648 conf = { 649 'SRCTOPS': [], 650 'OBJROOTS': [], 651 'EXCLUDES': [], 652 } 653 654 try: 655 machine = os.environ['MACHINE'] 656 if machine: 657 conf['MACHINE'] = machine 658 machine_arch = os.environ['MACHINE_ARCH'] 659 if machine_arch: 660 conf['MACHINE_ARCH'] = machine_arch 661 srctop = os.environ['SB_SRC'] 662 if srctop: 663 conf['SRCTOPS'].append(srctop) 664 objroot = os.environ['SB_OBJROOT'] 665 if objroot: 666 conf['OBJROOTS'].append(objroot) 667 except: 668 pass 669 670 debug = 0 671 output = True 672 673 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 674 for o, a in opts: 675 if o == '-a': 676 conf['MACHINE_ARCH'] = a 677 elif o == '-d': 678 debug += 1 679 elif o == '-q': 680 output = False 681 elif o == '-H': 682 conf['HOST_TARGET'] = a 683 elif o == '-S': 684 if a not in conf['SRCTOPS']: 685 conf['SRCTOPS'].append(a) 686 elif o == '-C': 687 conf['CURDIR'] = a 688 elif o == '-O': 689 if a not in conf['OBJROOTS']: 690 conf['OBJROOTS'].append(a) 691 elif o == '-R': 692 conf['RELDIR'] = a 693 elif o == '-D': 694 conf['DPDEPS'] = a 695 elif o == '-m': 696 conf['MACHINE'] = a 697 elif o == '-T': 698 conf['TARGET_SPEC'] = a 699 elif o == '-X': 700 if a not in conf['EXCLUDES']: 701 conf['EXCLUDES'].append(a) 702 elif xoptf: 703 xoptf(o, a, conf) 704 705 conf['debug'] = debug 706 707 # get any var=val assignments 708 eaten = [] 709 for a in args: 710 if a.find('=') > 0: 711 k,v = a.split('=') 712 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 713 if k == 'SRCTOP': 714 k = 'SRCTOPS' 715 elif k == 'OBJROOT': 716 k = 'OBJROOTS' 717 if v not in conf[k]: 718 conf[k].append(v) 719 else: 720 conf[k] = v 721 eaten.append(a) 722 continue 723 break 724 725 for a in eaten: 726 args.remove(a) 727 728 debug_out = getv(conf, 'debug_out', sys.stderr) 729 730 if debug: 731 print("config:", file=debug_out) 732 print("psyco=", have_psyco, file=debug_out) 733 for k,v in list(conf.items()): 734 print("%s=%s" % (k,v), file=debug_out) 735 736 m = None 737 for a in args: 738 if a.endswith('.meta'): 739 if not os.path.exists(a): 740 continue 741 m = klass(a, conf) 742 elif a.startswith('@'): 743 # there can actually multiple files per line 744 for line in open(a[1:]): 745 for f in line.strip().split(): 746 if not os.path.exists(f): 747 continue 748 m = klass(f, conf) 749 750 if output and m: 751 print(m.dirdeps()) 752 753 print(m.src_dirdeps('\nsrc:')) 754 755 dpdeps = getv(conf, 'DPDEPS') 756 if dpdeps: 757 m.file_depends(open(dpdeps, 'wb')) 758 759 return m 760 761if __name__ == '__main__': 762 try: 763 main(sys.argv) 764 except: 765 # yes, this goes to stdout 766 print("ERROR: ", sys.exc_info()[1]) 767 raise 768 769