1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.26 2017/05/09 04:04:16 sjg Exp $ 41 42 Copyright (c) 2011-2013, Juniper Networks, Inc. 43 All rights reserved. 44 45 Redistribution and use in source and binary forms, with or without 46 modification, are permitted provided that the following conditions 47 are met: 48 1. Redistributions of source code must retain the above copyright 49 notice, this list of conditions and the following disclaimer. 50 2. Redistributions in binary form must reproduce the above copyright 51 notice, this list of conditions and the following disclaimer in the 52 documentation and/or other materials provided with the distribution. 53 54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 66""" 67 68import os, re, sys 69 70def getv(dict, key, d=None): 71 """Lookup key in dict and return value or the supplied default.""" 72 if key in dict: 73 return dict[key] 74 return d 75 76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 77 """ 78 Return an absolute path, resolving via cwd or last_dir if needed. 79 """ 80 if path.endswith('/.'): 81 path = path[0:-2] 82 if len(path) > 0 and path[0] == '/': 83 return path 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 p = '/'.join([d,path]) 94 if debug > 2: 95 print("looking for:", p, end=' ', file=debug_out) 96 if not os.path.exists(p): 97 if debug > 2: 98 print("nope", file=debug_out) 99 p = None 100 continue 101 if debug > 2: 102 print("found:", p, file=debug_out) 103 return p 104 return None 105 106def cleanpath(path): 107 """cleanup path without using realpath(3)""" 108 if path.startswith('/'): 109 r = '/' 110 else: 111 r = '' 112 p = [] 113 w = path.split('/') 114 for d in w: 115 if not d or d == '.': 116 continue 117 if d == '..': 118 p.pop() 119 continue 120 p.append(d) 121 122 return r + '/'.join(p) 123 124def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 125 """ 126 Return an absolute path, resolving via cwd or last_dir if needed. 127 this gets called a lot, so we try to avoid calling realpath. 128 """ 129 rpath = resolve(path, cwd, last_dir, debug, debug_out) 130 if rpath: 131 path = rpath 132 if (path.find('/') < 0 or 133 path.find('./') > 0 or 134 path.endswith('/..')): 135 path = cleanpath(path) 136 return path 137 138def sort_unique(list, cmp=None, key=None, reverse=False): 139 list.sort(cmp, key, reverse) 140 nl = [] 141 le = None 142 for e in list: 143 if e == le: 144 continue 145 le = e 146 nl.append(e) 147 return nl 148 149def add_trims(x): 150 return ['/' + x + '/', 151 '/' + x, 152 x + '/', 153 x] 154 155class MetaFile: 156 """class to parse meta files generated by bmake.""" 157 158 conf = None 159 dirdep_re = None 160 host_target = None 161 srctops = [] 162 objroots = [] 163 excludes = [] 164 seen = {} 165 obj_deps = [] 166 src_deps = [] 167 file_deps = [] 168 169 def __init__(self, name, conf={}): 170 """if name is set we will parse it now. 171 conf can have the follwing keys: 172 173 SRCTOPS list of tops of the src tree(s). 174 175 CURDIR the src directory 'bmake' was run from. 176 177 RELDIR the relative path from SRCTOP to CURDIR 178 179 MACHINE the machine we built for. 180 set to 'none' if we are not cross-building. 181 More specifically if machine cannot be deduced from objdirs. 182 183 TARGET_SPEC 184 Sometimes MACHINE isn't enough. 185 186 HOST_TARGET 187 when we build for the pseudo machine 'host' 188 the object tree uses HOST_TARGET rather than MACHINE. 189 190 OBJROOTS a list of the common prefix for all obj dirs it might 191 end in '/' or '-'. 192 193 DPDEPS names an optional file to which per file dependencies 194 will be appended. 195 For example if 'some/path/foo.h' is read from SRCTOP 196 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 197 This can allow 'bmake' to learn all the dirs within 198 the tree that depend on 'foo.h' 199 200 EXCLUDES 201 A list of paths to ignore. 202 ccache(1) can otherwise be trouble. 203 204 debug desired debug level 205 206 debug_out open file to send debug output to (sys.stderr) 207 208 """ 209 210 self.name = name 211 self.debug = getv(conf, 'debug', 0) 212 self.debug_out = getv(conf, 'debug_out', sys.stderr) 213 214 self.machine = getv(conf, 'MACHINE', '') 215 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 216 self.target_spec = getv(conf, 'TARGET_SPEC', '') 217 self.curdir = getv(conf, 'CURDIR') 218 self.reldir = getv(conf, 'RELDIR') 219 self.dpdeps = getv(conf, 'DPDEPS') 220 self.line = 0 221 222 if not self.conf: 223 # some of the steps below we want to do only once 224 self.conf = conf 225 self.host_target = getv(conf, 'HOST_TARGET') 226 for srctop in getv(conf, 'SRCTOPS', []): 227 if srctop[-1] != '/': 228 srctop += '/' 229 if not srctop in self.srctops: 230 self.srctops.append(srctop) 231 _srctop = os.path.realpath(srctop) 232 if _srctop[-1] != '/': 233 _srctop += '/' 234 if not _srctop in self.srctops: 235 self.srctops.append(_srctop) 236 237 trim_list = add_trims(self.machine) 238 if self.machine == 'host': 239 trim_list += add_trims(self.host_target) 240 if self.target_spec: 241 trim_list += add_trims(self.target_spec) 242 243 for objroot in getv(conf, 'OBJROOTS', []): 244 for e in trim_list: 245 if objroot.endswith(e): 246 # this is not what we want - fix it 247 objroot = objroot[0:-len(e)] 248 249 if objroot[-1] != '/': 250 objroot += '/' 251 if not objroot in self.objroots: 252 self.objroots.append(objroot) 253 _objroot = os.path.realpath(objroot) 254 if objroot[-1] == '/': 255 _objroot += '/' 256 if not _objroot in self.objroots: 257 self.objroots.append(_objroot) 258 259 # we want the longest match 260 self.srctops.sort(reverse=True) 261 self.objroots.sort(reverse=True) 262 263 self.excludes = getv(conf, 'EXCLUDES', []) 264 265 if self.debug: 266 print("host_target=", self.host_target, file=self.debug_out) 267 print("srctops=", self.srctops, file=self.debug_out) 268 print("objroots=", self.objroots, file=self.debug_out) 269 print("excludes=", self.excludes, file=self.debug_out) 270 271 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 272 273 if self.dpdeps and not self.reldir: 274 if self.debug: 275 print("need reldir:", end=' ', file=self.debug_out) 276 if self.curdir: 277 srctop = self.find_top(self.curdir, self.srctops) 278 if srctop: 279 self.reldir = self.curdir.replace(srctop,'') 280 if self.debug: 281 print(self.reldir, file=self.debug_out) 282 if not self.reldir: 283 self.dpdeps = None # we cannot do it? 284 285 self.cwd = os.getcwd() # make sure this is initialized 286 self.last_dir = self.cwd 287 288 if name: 289 self.try_parse() 290 291 def reset(self): 292 """reset state if we are being passed meta files from multiple directories.""" 293 self.seen = {} 294 self.obj_deps = [] 295 self.src_deps = [] 296 self.file_deps = [] 297 298 def dirdeps(self, sep='\n'): 299 """return DIRDEPS""" 300 return sep.strip() + sep.join(self.obj_deps) 301 302 def src_dirdeps(self, sep='\n'): 303 """return SRC_DIRDEPS""" 304 return sep.strip() + sep.join(self.src_deps) 305 306 def file_depends(self, out=None): 307 """Append DPDEPS_${file} += ${RELDIR} 308 for each file we saw, to the output file.""" 309 if not self.reldir: 310 return None 311 for f in sort_unique(self.file_deps): 312 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 313 # these entries provide for reverse DIRDEPS lookup 314 for f in self.obj_deps: 315 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 316 317 def seenit(self, dir): 318 """rememer that we have seen dir.""" 319 self.seen[dir] = 1 320 321 def add(self, list, data, clue=''): 322 """add data to list if it isn't already there.""" 323 if data not in list: 324 list.append(data) 325 if self.debug: 326 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 327 328 def find_top(self, path, list): 329 """the logical tree may be split across multiple trees""" 330 for top in list: 331 if path.startswith(top): 332 if self.debug > 2: 333 print("found in", top, file=self.debug_out) 334 return top 335 return None 336 337 def find_obj(self, objroot, dir, path, input): 338 """return path within objroot, taking care of .dirdep files""" 339 ddep = None 340 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 341 if not ddep and os.path.exists(ddepf): 342 ddep = open(ddepf, 'r').readline().strip('# \n') 343 if self.debug > 1: 344 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 345 if ddep.endswith(self.machine): 346 ddep = ddep[0:-(1+len(self.machine))] 347 elif self.target_spec and ddep.endswith(self.target_spec): 348 ddep = ddep[0:-(1+len(self.target_spec))] 349 350 if not ddep: 351 # no .dirdeps, so remember that we've seen the raw input 352 self.seenit(input) 353 self.seenit(dir) 354 if self.machine == 'none': 355 if dir.startswith(objroot): 356 return dir.replace(objroot,'') 357 return None 358 m = self.dirdep_re.match(dir.replace(objroot,'')) 359 if m: 360 ddep = m.group(2) 361 dmachine = m.group(1) 362 if dmachine != self.machine: 363 if not (self.machine == 'host' and 364 dmachine == self.host_target): 365 if self.debug > 2: 366 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 367 ddep += '.' + dmachine 368 369 return ddep 370 371 def try_parse(self, name=None, file=None): 372 """give file and line number causing exception""" 373 try: 374 self.parse(name, file) 375 except: 376 # give a useful clue 377 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 378 raise 379 380 def parse(self, name=None, file=None): 381 """A meta file looks like: 382 383 # Meta data file "path" 384 CMD "command-line" 385 CWD "cwd" 386 TARGET "target" 387 -- command output -- 388 -- filemon acquired metadata -- 389 # buildmon version 3 390 V 3 391 C "pid" "cwd" 392 E "pid" "path" 393 F "pid" "child" 394 R "pid" "path" 395 W "pid" "path" 396 X "pid" "status" 397 D "pid" "path" 398 L "pid" "src" "target" 399 M "pid" "old" "new" 400 S "pid" "path" 401 # Bye bye 402 403 We go to some effort to avoid processing a dependency more than once. 404 Of the above record types only C,E,F,L,R,V and W are of interest. 405 """ 406 407 version = 0 # unknown 408 if name: 409 self.name = name; 410 if file: 411 f = file 412 cwd = self.last_dir = self.cwd 413 else: 414 f = open(self.name, 'r') 415 skip = True 416 pid_cwd = {} 417 pid_last_dir = {} 418 last_pid = 0 419 420 self.line = 0 421 if self.curdir: 422 self.seenit(self.curdir) # we ignore this 423 424 interesting = 'CEFLRV' 425 for line in f: 426 self.line += 1 427 # ignore anything we don't care about 428 if not line[0] in interesting: 429 continue 430 if self.debug > 2: 431 print("input:", line, end=' ', file=self.debug_out) 432 w = line.split() 433 434 if skip: 435 if w[0] == 'V': 436 skip = False 437 version = int(w[1]) 438 """ 439 if version < 4: 440 # we cannot ignore 'W' records 441 # as they may be 'rw' 442 interesting += 'W' 443 """ 444 elif w[0] == 'CWD': 445 self.cwd = cwd = self.last_dir = w[1] 446 self.seenit(cwd) # ignore this 447 if self.debug: 448 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 449 continue 450 451 pid = int(w[1]) 452 if pid != last_pid: 453 if last_pid: 454 pid_last_dir[last_pid] = self.last_dir 455 cwd = getv(pid_cwd, pid, self.cwd) 456 self.last_dir = getv(pid_last_dir, pid, self.cwd) 457 last_pid = pid 458 459 # process operations 460 if w[0] == 'F': 461 npid = int(w[2]) 462 pid_cwd[npid] = cwd 463 pid_last_dir[npid] = cwd 464 last_pid = npid 465 continue 466 elif w[0] == 'C': 467 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 468 if cwd.endswith('/.'): 469 cwd = cwd[0:-2] 470 self.last_dir = pid_last_dir[pid] = cwd 471 pid_cwd[pid] = cwd 472 if self.debug > 1: 473 print("cwd=", cwd, file=self.debug_out) 474 continue 475 476 if w[2] in self.seen: 477 if self.debug > 2: 478 print("seen:", w[2], file=self.debug_out) 479 continue 480 # file operations 481 if w[0] in 'ML': 482 # these are special, tread src as read and 483 # target as write 484 self.parse_path(w[1].strip("'"), cwd, 'R', w) 485 self.parse_path(w[2].strip("'"), cwd, 'W', w) 486 continue 487 elif w[0] in 'ERWS': 488 path = w[2] 489 self.parse_path(path, cwd, w[0], w) 490 491 if not file: 492 f.close() 493 494 def is_src(self, base, dir, rdir): 495 """is base in srctop""" 496 for dir in [dir,rdir]: 497 if not dir: 498 continue 499 path = '/'.join([dir,base]) 500 srctop = self.find_top(path, self.srctops) 501 if srctop: 502 if self.dpdeps: 503 self.add(self.file_deps, path.replace(srctop,''), 'file') 504 self.add(self.src_deps, dir.replace(srctop,''), 'src') 505 self.seenit(dir) 506 return True 507 return False 508 509 def parse_path(self, path, cwd, op=None, w=[]): 510 """look at a path for the op specified""" 511 512 if not op: 513 op = w[0] 514 515 # we are never interested in .dirdep files as dependencies 516 if path.endswith('.dirdep'): 517 return 518 for p in self.excludes: 519 if p and path.startswith(p): 520 if self.debug > 2: 521 print("exclude:", p, path, file=self.debug_out) 522 return 523 # we don't want to resolve the last component if it is 524 # a symlink 525 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 526 if not path: 527 return 528 dir,base = os.path.split(path) 529 if dir in self.seen: 530 if self.debug > 2: 531 print("seen:", dir, file=self.debug_out) 532 return 533 # we can have a path in an objdir which is a link 534 # to the src dir, we may need to add dependencies for each 535 rdir = dir 536 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 537 rdir = os.path.realpath(dir) 538 if rdir == dir: 539 rdir = None 540 # now put path back together 541 path = '/'.join([dir,base]) 542 if self.debug > 1: 543 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 544 if op in 'RWS': 545 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 546 if self.debug > 1: 547 print("skipping:", path, file=self.debug_out) 548 return 549 if os.path.isdir(path): 550 if op in 'RW': 551 self.last_dir = path; 552 if self.debug > 1: 553 print("ldir=", self.last_dir, file=self.debug_out) 554 return 555 556 if op in 'ERW': 557 # finally, we get down to it 558 if dir == self.cwd or dir == self.curdir: 559 return 560 if self.is_src(base, dir, rdir): 561 self.seenit(w[2]) 562 if not rdir: 563 return 564 565 objroot = None 566 for dir in [dir,rdir]: 567 if not dir: 568 continue 569 objroot = self.find_top(dir, self.objroots) 570 if objroot: 571 break 572 if objroot: 573 ddep = self.find_obj(objroot, dir, path, w[2]) 574 if ddep: 575 self.add(self.obj_deps, ddep, 'obj') 576 if self.dpdeps and objroot.endswith('/stage/'): 577 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 578 self.add(self.file_deps, sp, 'file') 579 else: 580 # don't waste time looking again 581 self.seenit(w[2]) 582 self.seenit(dir) 583 584 585def main(argv, klass=MetaFile, xopts='', xoptf=None): 586 """Simple driver for class MetaFile. 587 588 Usage: 589 script [options] [key=value ...] "meta" ... 590 591 Options and key=value pairs contribute to the 592 dictionary passed to MetaFile. 593 594 -S "SRCTOP" 595 add "SRCTOP" to the "SRCTOPS" list. 596 597 -C "CURDIR" 598 599 -O "OBJROOT" 600 add "OBJROOT" to the "OBJROOTS" list. 601 602 -m "MACHINE" 603 604 -a "MACHINE_ARCH" 605 606 -H "HOST_TARGET" 607 608 -D "DPDEPS" 609 610 -d bumps debug level 611 612 """ 613 import getopt 614 615 # import Psyco if we can 616 # it can speed things up quite a bit 617 have_psyco = 0 618 try: 619 import psyco 620 psyco.full() 621 have_psyco = 1 622 except: 623 pass 624 625 conf = { 626 'SRCTOPS': [], 627 'OBJROOTS': [], 628 'EXCLUDES': [], 629 } 630 631 try: 632 machine = os.environ['MACHINE'] 633 if machine: 634 conf['MACHINE'] = machine 635 machine_arch = os.environ['MACHINE_ARCH'] 636 if machine_arch: 637 conf['MACHINE_ARCH'] = machine_arch 638 srctop = os.environ['SB_SRC'] 639 if srctop: 640 conf['SRCTOPS'].append(srctop) 641 objroot = os.environ['SB_OBJROOT'] 642 if objroot: 643 conf['OBJROOTS'].append(objroot) 644 except: 645 pass 646 647 debug = 0 648 output = True 649 650 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 651 for o, a in opts: 652 if o == '-a': 653 conf['MACHINE_ARCH'] = a 654 elif o == '-d': 655 debug += 1 656 elif o == '-q': 657 output = False 658 elif o == '-H': 659 conf['HOST_TARGET'] = a 660 elif o == '-S': 661 if a not in conf['SRCTOPS']: 662 conf['SRCTOPS'].append(a) 663 elif o == '-C': 664 conf['CURDIR'] = a 665 elif o == '-O': 666 if a not in conf['OBJROOTS']: 667 conf['OBJROOTS'].append(a) 668 elif o == '-R': 669 conf['RELDIR'] = a 670 elif o == '-D': 671 conf['DPDEPS'] = a 672 elif o == '-m': 673 conf['MACHINE'] = a 674 elif o == '-T': 675 conf['TARGET_SPEC'] = a 676 elif o == '-X': 677 if a not in conf['EXCLUDES']: 678 conf['EXCLUDES'].append(a) 679 elif xoptf: 680 xoptf(o, a, conf) 681 682 conf['debug'] = debug 683 684 # get any var=val assignments 685 eaten = [] 686 for a in args: 687 if a.find('=') > 0: 688 k,v = a.split('=') 689 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 690 if k == 'SRCTOP': 691 k = 'SRCTOPS' 692 elif k == 'OBJROOT': 693 k = 'OBJROOTS' 694 if v not in conf[k]: 695 conf[k].append(v) 696 else: 697 conf[k] = v 698 eaten.append(a) 699 continue 700 break 701 702 for a in eaten: 703 args.remove(a) 704 705 debug_out = getv(conf, 'debug_out', sys.stderr) 706 707 if debug: 708 print("config:", file=debug_out) 709 print("psyco=", have_psyco, file=debug_out) 710 for k,v in list(conf.items()): 711 print("%s=%s" % (k,v), file=debug_out) 712 713 m = None 714 for a in args: 715 if a.endswith('.meta'): 716 if not os.path.exists(a): 717 continue 718 m = klass(a, conf) 719 elif a.startswith('@'): 720 # there can actually multiple files per line 721 for line in open(a[1:]): 722 for f in line.strip().split(): 723 if not os.path.exists(f): 724 continue 725 m = klass(f, conf) 726 727 if output and m: 728 print(m.dirdeps()) 729 730 print(m.src_dirdeps('\nsrc:')) 731 732 dpdeps = getv(conf, 'DPDEPS') 733 if dpdeps: 734 m.file_depends(open(dpdeps, 'wb')) 735 736 return m 737 738if __name__ == '__main__': 739 try: 740 main(sys.argv) 741 except: 742 # yes, this goes to stdout 743 print("ERROR: ", sys.exc_info()[1]) 744 raise 745 746