1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.27 2017/05/24 00:04:04 sjg Exp $ 41 42 Copyright (c) 2011-2013, Juniper Networks, Inc. 43 All rights reserved. 44 45 Redistribution and use in source and binary forms, with or without 46 modification, are permitted provided that the following conditions 47 are met: 48 1. Redistributions of source code must retain the above copyright 49 notice, this list of conditions and the following disclaimer. 50 2. Redistributions in binary form must reproduce the above copyright 51 notice, this list of conditions and the following disclaimer in the 52 documentation and/or other materials provided with the distribution. 53 54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 66""" 67 68import os, re, sys 69 70def getv(dict, key, d=None): 71 """Lookup key in dict and return value or the supplied default.""" 72 if key in dict: 73 return dict[key] 74 return d 75 76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 77 """ 78 Return an absolute path, resolving via cwd or last_dir if needed. 79 """ 80 if path.endswith('/.'): 81 path = path[0:-2] 82 if len(path) > 0 and path[0] == '/': 83 return path 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 if path == '..': 94 dw = d.split('/') 95 p = '/'.join(dw[:-1]) 96 if not p: 97 p = '/' 98 return p 99 p = '/'.join([d,path]) 100 if debug > 2: 101 print("looking for:", p, end=' ', file=debug_out) 102 if not os.path.exists(p): 103 if debug > 2: 104 print("nope", file=debug_out) 105 p = None 106 continue 107 if debug > 2: 108 print("found:", p, file=debug_out) 109 return p 110 return None 111 112def cleanpath(path): 113 """cleanup path without using realpath(3)""" 114 if path.startswith('/'): 115 r = '/' 116 else: 117 r = '' 118 p = [] 119 w = path.split('/') 120 for d in w: 121 if not d or d == '.': 122 continue 123 if d == '..': 124 try: 125 p.pop() 126 continue 127 except: 128 break 129 p.append(d) 130 131 return r + '/'.join(p) 132 133def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 134 """ 135 Return an absolute path, resolving via cwd or last_dir if needed. 136 this gets called a lot, so we try to avoid calling realpath. 137 """ 138 rpath = resolve(path, cwd, last_dir, debug, debug_out) 139 if rpath: 140 path = rpath 141 if (path.find('/') < 0 or 142 path.find('./') > 0 or 143 path.endswith('/..')): 144 path = cleanpath(path) 145 return path 146 147def sort_unique(list, cmp=None, key=None, reverse=False): 148 list.sort(cmp, key, reverse) 149 nl = [] 150 le = None 151 for e in list: 152 if e == le: 153 continue 154 le = e 155 nl.append(e) 156 return nl 157 158def add_trims(x): 159 return ['/' + x + '/', 160 '/' + x, 161 x + '/', 162 x] 163 164class MetaFile: 165 """class to parse meta files generated by bmake.""" 166 167 conf = None 168 dirdep_re = None 169 host_target = None 170 srctops = [] 171 objroots = [] 172 excludes = [] 173 seen = {} 174 obj_deps = [] 175 src_deps = [] 176 file_deps = [] 177 178 def __init__(self, name, conf={}): 179 """if name is set we will parse it now. 180 conf can have the follwing keys: 181 182 SRCTOPS list of tops of the src tree(s). 183 184 CURDIR the src directory 'bmake' was run from. 185 186 RELDIR the relative path from SRCTOP to CURDIR 187 188 MACHINE the machine we built for. 189 set to 'none' if we are not cross-building. 190 More specifically if machine cannot be deduced from objdirs. 191 192 TARGET_SPEC 193 Sometimes MACHINE isn't enough. 194 195 HOST_TARGET 196 when we build for the pseudo machine 'host' 197 the object tree uses HOST_TARGET rather than MACHINE. 198 199 OBJROOTS a list of the common prefix for all obj dirs it might 200 end in '/' or '-'. 201 202 DPDEPS names an optional file to which per file dependencies 203 will be appended. 204 For example if 'some/path/foo.h' is read from SRCTOP 205 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 206 This can allow 'bmake' to learn all the dirs within 207 the tree that depend on 'foo.h' 208 209 EXCLUDES 210 A list of paths to ignore. 211 ccache(1) can otherwise be trouble. 212 213 debug desired debug level 214 215 debug_out open file to send debug output to (sys.stderr) 216 217 """ 218 219 self.name = name 220 self.debug = getv(conf, 'debug', 0) 221 self.debug_out = getv(conf, 'debug_out', sys.stderr) 222 223 self.machine = getv(conf, 'MACHINE', '') 224 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 225 self.target_spec = getv(conf, 'TARGET_SPEC', '') 226 self.curdir = getv(conf, 'CURDIR') 227 self.reldir = getv(conf, 'RELDIR') 228 self.dpdeps = getv(conf, 'DPDEPS') 229 self.line = 0 230 231 if not self.conf: 232 # some of the steps below we want to do only once 233 self.conf = conf 234 self.host_target = getv(conf, 'HOST_TARGET') 235 for srctop in getv(conf, 'SRCTOPS', []): 236 if srctop[-1] != '/': 237 srctop += '/' 238 if not srctop in self.srctops: 239 self.srctops.append(srctop) 240 _srctop = os.path.realpath(srctop) 241 if _srctop[-1] != '/': 242 _srctop += '/' 243 if not _srctop in self.srctops: 244 self.srctops.append(_srctop) 245 246 trim_list = add_trims(self.machine) 247 if self.machine == 'host': 248 trim_list += add_trims(self.host_target) 249 if self.target_spec: 250 trim_list += add_trims(self.target_spec) 251 252 for objroot in getv(conf, 'OBJROOTS', []): 253 for e in trim_list: 254 if objroot.endswith(e): 255 # this is not what we want - fix it 256 objroot = objroot[0:-len(e)] 257 258 if objroot[-1] != '/': 259 objroot += '/' 260 if not objroot in self.objroots: 261 self.objroots.append(objroot) 262 _objroot = os.path.realpath(objroot) 263 if objroot[-1] == '/': 264 _objroot += '/' 265 if not _objroot in self.objroots: 266 self.objroots.append(_objroot) 267 268 # we want the longest match 269 self.srctops.sort(reverse=True) 270 self.objroots.sort(reverse=True) 271 272 self.excludes = getv(conf, 'EXCLUDES', []) 273 274 if self.debug: 275 print("host_target=", self.host_target, file=self.debug_out) 276 print("srctops=", self.srctops, file=self.debug_out) 277 print("objroots=", self.objroots, file=self.debug_out) 278 print("excludes=", self.excludes, file=self.debug_out) 279 280 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 281 282 if self.dpdeps and not self.reldir: 283 if self.debug: 284 print("need reldir:", end=' ', file=self.debug_out) 285 if self.curdir: 286 srctop = self.find_top(self.curdir, self.srctops) 287 if srctop: 288 self.reldir = self.curdir.replace(srctop,'') 289 if self.debug: 290 print(self.reldir, file=self.debug_out) 291 if not self.reldir: 292 self.dpdeps = None # we cannot do it? 293 294 self.cwd = os.getcwd() # make sure this is initialized 295 self.last_dir = self.cwd 296 297 if name: 298 self.try_parse() 299 300 def reset(self): 301 """reset state if we are being passed meta files from multiple directories.""" 302 self.seen = {} 303 self.obj_deps = [] 304 self.src_deps = [] 305 self.file_deps = [] 306 307 def dirdeps(self, sep='\n'): 308 """return DIRDEPS""" 309 return sep.strip() + sep.join(self.obj_deps) 310 311 def src_dirdeps(self, sep='\n'): 312 """return SRC_DIRDEPS""" 313 return sep.strip() + sep.join(self.src_deps) 314 315 def file_depends(self, out=None): 316 """Append DPDEPS_${file} += ${RELDIR} 317 for each file we saw, to the output file.""" 318 if not self.reldir: 319 return None 320 for f in sort_unique(self.file_deps): 321 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 322 # these entries provide for reverse DIRDEPS lookup 323 for f in self.obj_deps: 324 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 325 326 def seenit(self, dir): 327 """rememer that we have seen dir.""" 328 self.seen[dir] = 1 329 330 def add(self, list, data, clue=''): 331 """add data to list if it isn't already there.""" 332 if data not in list: 333 list.append(data) 334 if self.debug: 335 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 336 337 def find_top(self, path, list): 338 """the logical tree may be split across multiple trees""" 339 for top in list: 340 if path.startswith(top): 341 if self.debug > 2: 342 print("found in", top, file=self.debug_out) 343 return top 344 return None 345 346 def find_obj(self, objroot, dir, path, input): 347 """return path within objroot, taking care of .dirdep files""" 348 ddep = None 349 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 350 if not ddep and os.path.exists(ddepf): 351 ddep = open(ddepf, 'r').readline().strip('# \n') 352 if self.debug > 1: 353 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 354 if ddep.endswith(self.machine): 355 ddep = ddep[0:-(1+len(self.machine))] 356 elif self.target_spec and ddep.endswith(self.target_spec): 357 ddep = ddep[0:-(1+len(self.target_spec))] 358 359 if not ddep: 360 # no .dirdeps, so remember that we've seen the raw input 361 self.seenit(input) 362 self.seenit(dir) 363 if self.machine == 'none': 364 if dir.startswith(objroot): 365 return dir.replace(objroot,'') 366 return None 367 m = self.dirdep_re.match(dir.replace(objroot,'')) 368 if m: 369 ddep = m.group(2) 370 dmachine = m.group(1) 371 if dmachine != self.machine: 372 if not (self.machine == 'host' and 373 dmachine == self.host_target): 374 if self.debug > 2: 375 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 376 ddep += '.' + dmachine 377 378 return ddep 379 380 def try_parse(self, name=None, file=None): 381 """give file and line number causing exception""" 382 try: 383 self.parse(name, file) 384 except: 385 # give a useful clue 386 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 387 raise 388 389 def parse(self, name=None, file=None): 390 """A meta file looks like: 391 392 # Meta data file "path" 393 CMD "command-line" 394 CWD "cwd" 395 TARGET "target" 396 -- command output -- 397 -- filemon acquired metadata -- 398 # buildmon version 3 399 V 3 400 C "pid" "cwd" 401 E "pid" "path" 402 F "pid" "child" 403 R "pid" "path" 404 W "pid" "path" 405 X "pid" "status" 406 D "pid" "path" 407 L "pid" "src" "target" 408 M "pid" "old" "new" 409 S "pid" "path" 410 # Bye bye 411 412 We go to some effort to avoid processing a dependency more than once. 413 Of the above record types only C,E,F,L,R,V and W are of interest. 414 """ 415 416 version = 0 # unknown 417 if name: 418 self.name = name; 419 if file: 420 f = file 421 cwd = self.last_dir = self.cwd 422 else: 423 f = open(self.name, 'r') 424 skip = True 425 pid_cwd = {} 426 pid_last_dir = {} 427 last_pid = 0 428 429 self.line = 0 430 if self.curdir: 431 self.seenit(self.curdir) # we ignore this 432 433 interesting = 'CEFLRV' 434 for line in f: 435 self.line += 1 436 # ignore anything we don't care about 437 if not line[0] in interesting: 438 continue 439 if self.debug > 2: 440 print("input:", line, end=' ', file=self.debug_out) 441 w = line.split() 442 443 if skip: 444 if w[0] == 'V': 445 skip = False 446 version = int(w[1]) 447 """ 448 if version < 4: 449 # we cannot ignore 'W' records 450 # as they may be 'rw' 451 interesting += 'W' 452 """ 453 elif w[0] == 'CWD': 454 self.cwd = cwd = self.last_dir = w[1] 455 self.seenit(cwd) # ignore this 456 if self.debug: 457 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 458 continue 459 460 pid = int(w[1]) 461 if pid != last_pid: 462 if last_pid: 463 pid_last_dir[last_pid] = self.last_dir 464 cwd = getv(pid_cwd, pid, self.cwd) 465 self.last_dir = getv(pid_last_dir, pid, self.cwd) 466 last_pid = pid 467 468 # process operations 469 if w[0] == 'F': 470 npid = int(w[2]) 471 pid_cwd[npid] = cwd 472 pid_last_dir[npid] = cwd 473 last_pid = npid 474 continue 475 elif w[0] == 'C': 476 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 477 if cwd.endswith('/.'): 478 cwd = cwd[0:-2] 479 self.last_dir = pid_last_dir[pid] = cwd 480 pid_cwd[pid] = cwd 481 if self.debug > 1: 482 print("cwd=", cwd, file=self.debug_out) 483 continue 484 485 if w[2] in self.seen: 486 if self.debug > 2: 487 print("seen:", w[2], file=self.debug_out) 488 continue 489 # file operations 490 if w[0] in 'ML': 491 # these are special, tread src as read and 492 # target as write 493 self.parse_path(w[1].strip("'"), cwd, 'R', w) 494 self.parse_path(w[2].strip("'"), cwd, 'W', w) 495 continue 496 elif w[0] in 'ERWS': 497 path = w[2] 498 self.parse_path(path, cwd, w[0], w) 499 500 if not file: 501 f.close() 502 503 def is_src(self, base, dir, rdir): 504 """is base in srctop""" 505 for dir in [dir,rdir]: 506 if not dir: 507 continue 508 path = '/'.join([dir,base]) 509 srctop = self.find_top(path, self.srctops) 510 if srctop: 511 if self.dpdeps: 512 self.add(self.file_deps, path.replace(srctop,''), 'file') 513 self.add(self.src_deps, dir.replace(srctop,''), 'src') 514 self.seenit(dir) 515 return True 516 return False 517 518 def parse_path(self, path, cwd, op=None, w=[]): 519 """look at a path for the op specified""" 520 521 if not op: 522 op = w[0] 523 524 # we are never interested in .dirdep files as dependencies 525 if path.endswith('.dirdep'): 526 return 527 for p in self.excludes: 528 if p and path.startswith(p): 529 if self.debug > 2: 530 print("exclude:", p, path, file=self.debug_out) 531 return 532 # we don't want to resolve the last component if it is 533 # a symlink 534 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 535 if not path: 536 return 537 dir,base = os.path.split(path) 538 if dir in self.seen: 539 if self.debug > 2: 540 print("seen:", dir, file=self.debug_out) 541 return 542 # we can have a path in an objdir which is a link 543 # to the src dir, we may need to add dependencies for each 544 rdir = dir 545 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 546 rdir = os.path.realpath(dir) 547 if rdir == dir: 548 rdir = None 549 # now put path back together 550 path = '/'.join([dir,base]) 551 if self.debug > 1: 552 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 553 if op in 'RWS': 554 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 555 if self.debug > 1: 556 print("skipping:", path, file=self.debug_out) 557 return 558 if os.path.isdir(path): 559 if op in 'RW': 560 self.last_dir = path; 561 if self.debug > 1: 562 print("ldir=", self.last_dir, file=self.debug_out) 563 return 564 565 if op in 'ERW': 566 # finally, we get down to it 567 if dir == self.cwd or dir == self.curdir: 568 return 569 if self.is_src(base, dir, rdir): 570 self.seenit(w[2]) 571 if not rdir: 572 return 573 574 objroot = None 575 for dir in [dir,rdir]: 576 if not dir: 577 continue 578 objroot = self.find_top(dir, self.objroots) 579 if objroot: 580 break 581 if objroot: 582 ddep = self.find_obj(objroot, dir, path, w[2]) 583 if ddep: 584 self.add(self.obj_deps, ddep, 'obj') 585 if self.dpdeps and objroot.endswith('/stage/'): 586 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 587 self.add(self.file_deps, sp, 'file') 588 else: 589 # don't waste time looking again 590 self.seenit(w[2]) 591 self.seenit(dir) 592 593 594def main(argv, klass=MetaFile, xopts='', xoptf=None): 595 """Simple driver for class MetaFile. 596 597 Usage: 598 script [options] [key=value ...] "meta" ... 599 600 Options and key=value pairs contribute to the 601 dictionary passed to MetaFile. 602 603 -S "SRCTOP" 604 add "SRCTOP" to the "SRCTOPS" list. 605 606 -C "CURDIR" 607 608 -O "OBJROOT" 609 add "OBJROOT" to the "OBJROOTS" list. 610 611 -m "MACHINE" 612 613 -a "MACHINE_ARCH" 614 615 -H "HOST_TARGET" 616 617 -D "DPDEPS" 618 619 -d bumps debug level 620 621 """ 622 import getopt 623 624 # import Psyco if we can 625 # it can speed things up quite a bit 626 have_psyco = 0 627 try: 628 import psyco 629 psyco.full() 630 have_psyco = 1 631 except: 632 pass 633 634 conf = { 635 'SRCTOPS': [], 636 'OBJROOTS': [], 637 'EXCLUDES': [], 638 } 639 640 try: 641 machine = os.environ['MACHINE'] 642 if machine: 643 conf['MACHINE'] = machine 644 machine_arch = os.environ['MACHINE_ARCH'] 645 if machine_arch: 646 conf['MACHINE_ARCH'] = machine_arch 647 srctop = os.environ['SB_SRC'] 648 if srctop: 649 conf['SRCTOPS'].append(srctop) 650 objroot = os.environ['SB_OBJROOT'] 651 if objroot: 652 conf['OBJROOTS'].append(objroot) 653 except: 654 pass 655 656 debug = 0 657 output = True 658 659 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 660 for o, a in opts: 661 if o == '-a': 662 conf['MACHINE_ARCH'] = a 663 elif o == '-d': 664 debug += 1 665 elif o == '-q': 666 output = False 667 elif o == '-H': 668 conf['HOST_TARGET'] = a 669 elif o == '-S': 670 if a not in conf['SRCTOPS']: 671 conf['SRCTOPS'].append(a) 672 elif o == '-C': 673 conf['CURDIR'] = a 674 elif o == '-O': 675 if a not in conf['OBJROOTS']: 676 conf['OBJROOTS'].append(a) 677 elif o == '-R': 678 conf['RELDIR'] = a 679 elif o == '-D': 680 conf['DPDEPS'] = a 681 elif o == '-m': 682 conf['MACHINE'] = a 683 elif o == '-T': 684 conf['TARGET_SPEC'] = a 685 elif o == '-X': 686 if a not in conf['EXCLUDES']: 687 conf['EXCLUDES'].append(a) 688 elif xoptf: 689 xoptf(o, a, conf) 690 691 conf['debug'] = debug 692 693 # get any var=val assignments 694 eaten = [] 695 for a in args: 696 if a.find('=') > 0: 697 k,v = a.split('=') 698 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 699 if k == 'SRCTOP': 700 k = 'SRCTOPS' 701 elif k == 'OBJROOT': 702 k = 'OBJROOTS' 703 if v not in conf[k]: 704 conf[k].append(v) 705 else: 706 conf[k] = v 707 eaten.append(a) 708 continue 709 break 710 711 for a in eaten: 712 args.remove(a) 713 714 debug_out = getv(conf, 'debug_out', sys.stderr) 715 716 if debug: 717 print("config:", file=debug_out) 718 print("psyco=", have_psyco, file=debug_out) 719 for k,v in list(conf.items()): 720 print("%s=%s" % (k,v), file=debug_out) 721 722 m = None 723 for a in args: 724 if a.endswith('.meta'): 725 if not os.path.exists(a): 726 continue 727 m = klass(a, conf) 728 elif a.startswith('@'): 729 # there can actually multiple files per line 730 for line in open(a[1:]): 731 for f in line.strip().split(): 732 if not os.path.exists(f): 733 continue 734 m = klass(f, conf) 735 736 if output and m: 737 print(m.dirdeps()) 738 739 print(m.src_dirdeps('\nsrc:')) 740 741 dpdeps = getv(conf, 'DPDEPS') 742 if dpdeps: 743 m.file_depends(open(dpdeps, 'wb')) 744 745 return m 746 747if __name__ == '__main__': 748 try: 749 main(sys.argv) 750 except: 751 # yes, this goes to stdout 752 print("ERROR: ", sys.exc_info()[1]) 753 raise 754 755