1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.33 2020/08/19 17:51:53 sjg Exp $ 41 42 Copyright (c) 2011-2020, Simon J. Gerraty 43 Copyright (c) 2011-2017, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 if os.path.exists(path): 85 return path 86 if debug > 2: 87 print("skipping non-existent:", path, file=debug_out) 88 return None 89 if path == '.': 90 return cwd 91 if path.startswith('./'): 92 return cwd + path[1:] 93 if last_dir == cwd: 94 last_dir = None 95 for d in [last_dir, cwd]: 96 if not d: 97 continue 98 if path == '..': 99 dw = d.split('/') 100 p = '/'.join(dw[:-1]) 101 if not p: 102 p = '/' 103 return p 104 p = '/'.join([d,path]) 105 if debug > 2: 106 print("looking for:", p, end=' ', file=debug_out) 107 if not os.path.exists(p): 108 if debug > 2: 109 print("nope", file=debug_out) 110 p = None 111 continue 112 if debug > 2: 113 print("found:", p, file=debug_out) 114 return p 115 return None 116 117def cleanpath(path): 118 """cleanup path without using realpath(3)""" 119 if path.startswith('/'): 120 r = '/' 121 else: 122 r = '' 123 p = [] 124 w = path.split('/') 125 for d in w: 126 if not d or d == '.': 127 continue 128 if d == '..': 129 try: 130 p.pop() 131 continue 132 except: 133 break 134 p.append(d) 135 136 return r + '/'.join(p) 137 138def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 139 """ 140 Return an absolute path, resolving via cwd or last_dir if needed. 141 this gets called a lot, so we try to avoid calling realpath. 142 """ 143 rpath = resolve(path, cwd, last_dir, debug, debug_out) 144 if rpath: 145 path = rpath 146 elif len(path) > 0 and path[0] == '/': 147 return None 148 if (path.find('/') < 0 or 149 path.find('./') > 0 or 150 path.endswith('/..')): 151 path = cleanpath(path) 152 return path 153 154def sort_unique(list, cmp=None, key=None, reverse=False): 155 list.sort(cmp, key, reverse) 156 nl = [] 157 le = None 158 for e in list: 159 if e == le: 160 continue 161 le = e 162 nl.append(e) 163 return nl 164 165def add_trims(x): 166 return ['/' + x + '/', 167 '/' + x, 168 x + '/', 169 x] 170 171class MetaFile: 172 """class to parse meta files generated by bmake.""" 173 174 conf = None 175 dirdep_re = None 176 host_target = None 177 srctops = [] 178 objroots = [] 179 excludes = [] 180 seen = {} 181 obj_deps = [] 182 src_deps = [] 183 file_deps = [] 184 185 def __init__(self, name, conf={}): 186 """if name is set we will parse it now. 187 conf can have the follwing keys: 188 189 SRCTOPS list of tops of the src tree(s). 190 191 CURDIR the src directory 'bmake' was run from. 192 193 RELDIR the relative path from SRCTOP to CURDIR 194 195 MACHINE the machine we built for. 196 set to 'none' if we are not cross-building. 197 More specifically if machine cannot be deduced from objdirs. 198 199 TARGET_SPEC 200 Sometimes MACHINE isn't enough. 201 202 HOST_TARGET 203 when we build for the pseudo machine 'host' 204 the object tree uses HOST_TARGET rather than MACHINE. 205 206 OBJROOTS a list of the common prefix for all obj dirs it might 207 end in '/' or '-'. 208 209 DPDEPS names an optional file to which per file dependencies 210 will be appended. 211 For example if 'some/path/foo.h' is read from SRCTOP 212 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 213 This can allow 'bmake' to learn all the dirs within 214 the tree that depend on 'foo.h' 215 216 EXCLUDES 217 A list of paths to ignore. 218 ccache(1) can otherwise be trouble. 219 220 debug desired debug level 221 222 debug_out open file to send debug output to (sys.stderr) 223 224 """ 225 226 self.name = name 227 self.debug = getv(conf, 'debug', 0) 228 self.debug_out = getv(conf, 'debug_out', sys.stderr) 229 230 self.machine = getv(conf, 'MACHINE', '') 231 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 232 self.target_spec = getv(conf, 'TARGET_SPEC', '') 233 self.curdir = getv(conf, 'CURDIR') 234 self.reldir = getv(conf, 'RELDIR') 235 self.dpdeps = getv(conf, 'DPDEPS') 236 self.line = 0 237 238 if not self.conf: 239 # some of the steps below we want to do only once 240 self.conf = conf 241 self.host_target = getv(conf, 'HOST_TARGET') 242 for srctop in getv(conf, 'SRCTOPS', []): 243 if srctop[-1] != '/': 244 srctop += '/' 245 if not srctop in self.srctops: 246 self.srctops.append(srctop) 247 _srctop = os.path.realpath(srctop) 248 if _srctop[-1] != '/': 249 _srctop += '/' 250 if not _srctop in self.srctops: 251 self.srctops.append(_srctop) 252 253 trim_list = add_trims(self.machine) 254 if self.machine == 'host': 255 trim_list += add_trims(self.host_target) 256 if self.target_spec: 257 trim_list += add_trims(self.target_spec) 258 259 for objroot in getv(conf, 'OBJROOTS', []): 260 for e in trim_list: 261 if objroot.endswith(e): 262 # this is not what we want - fix it 263 objroot = objroot[0:-len(e)] 264 265 if objroot[-1] != '/': 266 objroot += '/' 267 if not objroot in self.objroots: 268 self.objroots.append(objroot) 269 _objroot = os.path.realpath(objroot) 270 if objroot[-1] == '/': 271 _objroot += '/' 272 if not _objroot in self.objroots: 273 self.objroots.append(_objroot) 274 275 # we want the longest match 276 self.srctops.sort(reverse=True) 277 self.objroots.sort(reverse=True) 278 279 self.excludes = getv(conf, 'EXCLUDES', []) 280 281 if self.debug: 282 print("host_target=", self.host_target, file=self.debug_out) 283 print("srctops=", self.srctops, file=self.debug_out) 284 print("objroots=", self.objroots, file=self.debug_out) 285 print("excludes=", self.excludes, file=self.debug_out) 286 287 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 288 289 if self.dpdeps and not self.reldir: 290 if self.debug: 291 print("need reldir:", end=' ', file=self.debug_out) 292 if self.curdir: 293 srctop = self.find_top(self.curdir, self.srctops) 294 if srctop: 295 self.reldir = self.curdir.replace(srctop,'') 296 if self.debug: 297 print(self.reldir, file=self.debug_out) 298 if not self.reldir: 299 self.dpdeps = None # we cannot do it? 300 301 self.cwd = os.getcwd() # make sure this is initialized 302 self.last_dir = self.cwd 303 304 if name: 305 self.try_parse() 306 307 def reset(self): 308 """reset state if we are being passed meta files from multiple directories.""" 309 self.seen = {} 310 self.obj_deps = [] 311 self.src_deps = [] 312 self.file_deps = [] 313 314 def dirdeps(self, sep='\n'): 315 """return DIRDEPS""" 316 return sep.strip() + sep.join(self.obj_deps) 317 318 def src_dirdeps(self, sep='\n'): 319 """return SRC_DIRDEPS""" 320 return sep.strip() + sep.join(self.src_deps) 321 322 def file_depends(self, out=None): 323 """Append DPDEPS_${file} += ${RELDIR} 324 for each file we saw, to the output file.""" 325 if not self.reldir: 326 return None 327 for f in sort_unique(self.file_deps): 328 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 329 # these entries provide for reverse DIRDEPS lookup 330 for f in self.obj_deps: 331 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 332 333 def seenit(self, dir): 334 """rememer that we have seen dir.""" 335 self.seen[dir] = 1 336 337 def add(self, list, data, clue=''): 338 """add data to list if it isn't already there.""" 339 if data not in list: 340 list.append(data) 341 if self.debug: 342 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 343 344 def find_top(self, path, list): 345 """the logical tree may be split across multiple trees""" 346 for top in list: 347 if path.startswith(top): 348 if self.debug > 2: 349 print("found in", top, file=self.debug_out) 350 return top 351 return None 352 353 def find_obj(self, objroot, dir, path, input): 354 """return path within objroot, taking care of .dirdep files""" 355 ddep = None 356 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 357 if not ddep and os.path.exists(ddepf): 358 ddep = open(ddepf, 'r').readline().strip('# \n') 359 if self.debug > 1: 360 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 361 if ddep.endswith(self.machine): 362 ddep = ddep[0:-(1+len(self.machine))] 363 elif self.target_spec and ddep.endswith(self.target_spec): 364 ddep = ddep[0:-(1+len(self.target_spec))] 365 366 if not ddep: 367 # no .dirdeps, so remember that we've seen the raw input 368 self.seenit(input) 369 self.seenit(dir) 370 if self.machine == 'none': 371 if dir.startswith(objroot): 372 return dir.replace(objroot,'') 373 return None 374 m = self.dirdep_re.match(dir.replace(objroot,'')) 375 if m: 376 ddep = m.group(2) 377 dmachine = m.group(1) 378 if dmachine != self.machine: 379 if not (self.machine == 'host' and 380 dmachine == self.host_target): 381 if self.debug > 2: 382 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 383 ddep += '.' + dmachine 384 385 return ddep 386 387 def try_parse(self, name=None, file=None): 388 """give file and line number causing exception""" 389 try: 390 self.parse(name, file) 391 except: 392 # give a useful clue 393 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 394 raise 395 396 def parse(self, name=None, file=None): 397 """A meta file looks like: 398 399 # Meta data file "path" 400 CMD "command-line" 401 CWD "cwd" 402 TARGET "target" 403 -- command output -- 404 -- filemon acquired metadata -- 405 # buildmon version 3 406 V 3 407 C "pid" "cwd" 408 E "pid" "path" 409 F "pid" "child" 410 R "pid" "path" 411 W "pid" "path" 412 X "pid" "status" 413 D "pid" "path" 414 L "pid" "src" "target" 415 M "pid" "old" "new" 416 S "pid" "path" 417 # Bye bye 418 419 We go to some effort to avoid processing a dependency more than once. 420 Of the above record types only C,E,F,L,R,V and W are of interest. 421 """ 422 423 version = 0 # unknown 424 if name: 425 self.name = name; 426 if file: 427 f = file 428 cwd = self.last_dir = self.cwd 429 else: 430 f = open(self.name, 'r') 431 skip = True 432 pid_cwd = {} 433 pid_last_dir = {} 434 last_pid = 0 435 436 self.line = 0 437 if self.curdir: 438 self.seenit(self.curdir) # we ignore this 439 440 interesting = 'CEFLRV' 441 for line in f: 442 self.line += 1 443 # ignore anything we don't care about 444 if not line[0] in interesting: 445 continue 446 if self.debug > 2: 447 print("input:", line, end=' ', file=self.debug_out) 448 w = line.split() 449 450 if skip: 451 if w[0] == 'V': 452 skip = False 453 version = int(w[1]) 454 """ 455 if version < 4: 456 # we cannot ignore 'W' records 457 # as they may be 'rw' 458 interesting += 'W' 459 """ 460 elif w[0] == 'CWD': 461 self.cwd = cwd = self.last_dir = w[1] 462 self.seenit(cwd) # ignore this 463 if self.debug: 464 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 465 continue 466 467 pid = int(w[1]) 468 if pid != last_pid: 469 if last_pid: 470 pid_last_dir[last_pid] = self.last_dir 471 cwd = getv(pid_cwd, pid, self.cwd) 472 self.last_dir = getv(pid_last_dir, pid, self.cwd) 473 last_pid = pid 474 475 # process operations 476 if w[0] == 'F': 477 npid = int(w[2]) 478 pid_cwd[npid] = cwd 479 pid_last_dir[npid] = cwd 480 last_pid = npid 481 continue 482 elif w[0] == 'C': 483 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 484 if not cwd: 485 cwd = w[2] 486 if self.debug > 1: 487 print("missing cwd=", cwd, file=self.debug_out) 488 if cwd.endswith('/.'): 489 cwd = cwd[0:-2] 490 self.last_dir = pid_last_dir[pid] = cwd 491 pid_cwd[pid] = cwd 492 if self.debug > 1: 493 print("cwd=", cwd, file=self.debug_out) 494 continue 495 496 if w[2] in self.seen: 497 if self.debug > 2: 498 print("seen:", w[2], file=self.debug_out) 499 continue 500 # file operations 501 if w[0] in 'ML': 502 # these are special, tread src as read and 503 # target as write 504 self.parse_path(w[2].strip("'"), cwd, 'R', w) 505 self.parse_path(w[3].strip("'"), cwd, 'W', w) 506 continue 507 elif w[0] in 'ERWS': 508 path = w[2] 509 if path == '.': 510 continue 511 self.parse_path(path, cwd, w[0], w) 512 513 if not file: 514 f.close() 515 516 def is_src(self, base, dir, rdir): 517 """is base in srctop""" 518 for dir in [dir,rdir]: 519 if not dir: 520 continue 521 path = '/'.join([dir,base]) 522 srctop = self.find_top(path, self.srctops) 523 if srctop: 524 if self.dpdeps: 525 self.add(self.file_deps, path.replace(srctop,''), 'file') 526 self.add(self.src_deps, dir.replace(srctop,''), 'src') 527 self.seenit(dir) 528 return True 529 return False 530 531 def parse_path(self, path, cwd, op=None, w=[]): 532 """look at a path for the op specified""" 533 534 if not op: 535 op = w[0] 536 537 # we are never interested in .dirdep files as dependencies 538 if path.endswith('.dirdep'): 539 return 540 for p in self.excludes: 541 if p and path.startswith(p): 542 if self.debug > 2: 543 print("exclude:", p, path, file=self.debug_out) 544 return 545 # we don't want to resolve the last component if it is 546 # a symlink 547 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 548 if not path: 549 return 550 dir,base = os.path.split(path) 551 if dir in self.seen: 552 if self.debug > 2: 553 print("seen:", dir, file=self.debug_out) 554 return 555 # we can have a path in an objdir which is a link 556 # to the src dir, we may need to add dependencies for each 557 rdir = dir 558 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 559 rdir = os.path.realpath(dir) 560 if rdir == dir: 561 rdir = None 562 # now put path back together 563 path = '/'.join([dir,base]) 564 if self.debug > 1: 565 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 566 if op in 'RWS': 567 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 568 if self.debug > 1: 569 print("skipping:", path, file=self.debug_out) 570 return 571 if os.path.isdir(path): 572 if op in 'RW': 573 self.last_dir = path; 574 if self.debug > 1: 575 print("ldir=", self.last_dir, file=self.debug_out) 576 return 577 578 if op in 'ER': 579 # finally, we get down to it 580 if dir == self.cwd or dir == self.curdir: 581 return 582 if self.is_src(base, dir, rdir): 583 self.seenit(w[2]) 584 if not rdir: 585 return 586 587 objroot = None 588 for dir in [dir,rdir]: 589 if not dir: 590 continue 591 objroot = self.find_top(dir, self.objroots) 592 if objroot: 593 break 594 if objroot: 595 ddep = self.find_obj(objroot, dir, path, w[2]) 596 if ddep: 597 self.add(self.obj_deps, ddep, 'obj') 598 if self.dpdeps and objroot.endswith('/stage/'): 599 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 600 self.add(self.file_deps, sp, 'file') 601 else: 602 # don't waste time looking again 603 self.seenit(w[2]) 604 self.seenit(dir) 605 606 607def main(argv, klass=MetaFile, xopts='', xoptf=None): 608 """Simple driver for class MetaFile. 609 610 Usage: 611 script [options] [key=value ...] "meta" ... 612 613 Options and key=value pairs contribute to the 614 dictionary passed to MetaFile. 615 616 -S "SRCTOP" 617 add "SRCTOP" to the "SRCTOPS" list. 618 619 -C "CURDIR" 620 621 -O "OBJROOT" 622 add "OBJROOT" to the "OBJROOTS" list. 623 624 -m "MACHINE" 625 626 -a "MACHINE_ARCH" 627 628 -H "HOST_TARGET" 629 630 -D "DPDEPS" 631 632 -d bumps debug level 633 634 """ 635 import getopt 636 637 # import Psyco if we can 638 # it can speed things up quite a bit 639 have_psyco = 0 640 try: 641 import psyco 642 psyco.full() 643 have_psyco = 1 644 except: 645 pass 646 647 conf = { 648 'SRCTOPS': [], 649 'OBJROOTS': [], 650 'EXCLUDES': [], 651 } 652 653 try: 654 machine = os.environ['MACHINE'] 655 if machine: 656 conf['MACHINE'] = machine 657 machine_arch = os.environ['MACHINE_ARCH'] 658 if machine_arch: 659 conf['MACHINE_ARCH'] = machine_arch 660 srctop = os.environ['SB_SRC'] 661 if srctop: 662 conf['SRCTOPS'].append(srctop) 663 objroot = os.environ['SB_OBJROOT'] 664 if objroot: 665 conf['OBJROOTS'].append(objroot) 666 except: 667 pass 668 669 debug = 0 670 output = True 671 672 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 673 for o, a in opts: 674 if o == '-a': 675 conf['MACHINE_ARCH'] = a 676 elif o == '-d': 677 debug += 1 678 elif o == '-q': 679 output = False 680 elif o == '-H': 681 conf['HOST_TARGET'] = a 682 elif o == '-S': 683 if a not in conf['SRCTOPS']: 684 conf['SRCTOPS'].append(a) 685 elif o == '-C': 686 conf['CURDIR'] = a 687 elif o == '-O': 688 if a not in conf['OBJROOTS']: 689 conf['OBJROOTS'].append(a) 690 elif o == '-R': 691 conf['RELDIR'] = a 692 elif o == '-D': 693 conf['DPDEPS'] = a 694 elif o == '-m': 695 conf['MACHINE'] = a 696 elif o == '-T': 697 conf['TARGET_SPEC'] = a 698 elif o == '-X': 699 if a not in conf['EXCLUDES']: 700 conf['EXCLUDES'].append(a) 701 elif xoptf: 702 xoptf(o, a, conf) 703 704 conf['debug'] = debug 705 706 # get any var=val assignments 707 eaten = [] 708 for a in args: 709 if a.find('=') > 0: 710 k,v = a.split('=') 711 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 712 if k == 'SRCTOP': 713 k = 'SRCTOPS' 714 elif k == 'OBJROOT': 715 k = 'OBJROOTS' 716 if v not in conf[k]: 717 conf[k].append(v) 718 else: 719 conf[k] = v 720 eaten.append(a) 721 continue 722 break 723 724 for a in eaten: 725 args.remove(a) 726 727 debug_out = getv(conf, 'debug_out', sys.stderr) 728 729 if debug: 730 print("config:", file=debug_out) 731 print("psyco=", have_psyco, file=debug_out) 732 for k,v in list(conf.items()): 733 print("%s=%s" % (k,v), file=debug_out) 734 735 m = None 736 for a in args: 737 if a.endswith('.meta'): 738 if not os.path.exists(a): 739 continue 740 m = klass(a, conf) 741 elif a.startswith('@'): 742 # there can actually multiple files per line 743 for line in open(a[1:]): 744 for f in line.strip().split(): 745 if not os.path.exists(f): 746 continue 747 m = klass(f, conf) 748 749 if output and m: 750 print(m.dirdeps()) 751 752 print(m.src_dirdeps('\nsrc:')) 753 754 dpdeps = getv(conf, 'DPDEPS') 755 if dpdeps: 756 m.file_depends(open(dpdeps, 'wb')) 757 758 return m 759 760if __name__ == '__main__': 761 try: 762 main(sys.argv) 763 except: 764 # yes, this goes to stdout 765 print("ERROR: ", sys.exc_info()[1]) 766 raise 767 768