1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.22 2016/12/12 19:07:42 sjg Exp $ 41 42 Copyright (c) 2011-2013, Juniper Networks, Inc. 43 All rights reserved. 44 45 Redistribution and use in source and binary forms, with or without 46 modification, are permitted provided that the following conditions 47 are met: 48 1. Redistributions of source code must retain the above copyright 49 notice, this list of conditions and the following disclaimer. 50 2. Redistributions in binary form must reproduce the above copyright 51 notice, this list of conditions and the following disclaimer in the 52 documentation and/or other materials provided with the distribution. 53 54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 66""" 67 68import os, re, sys 69 70def getv(dict, key, d=None): 71 """Lookup key in dict and return value or the supplied default.""" 72 if key in dict: 73 return dict[key] 74 return d 75 76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 77 """ 78 Return an absolute path, resolving via cwd or last_dir if needed. 79 """ 80 if path.endswith('/.'): 81 path = path[0:-2] 82 if len(path) > 0 and path[0] == '/': 83 return path 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 p = '/'.join([d,path]) 94 if debug > 2: 95 print("looking for:", p, end=' ', file=debug_out) 96 if not os.path.exists(p): 97 if debug > 2: 98 print("nope", file=debug_out) 99 p = None 100 continue 101 if debug > 2: 102 print("found:", p, file=debug_out) 103 return p 104 return None 105 106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 107 """ 108 Return an absolute path, resolving via cwd or last_dir if needed. 109 this gets called a lot, so we try to avoid calling realpath 110 until we know we have something. 111 """ 112 rpath = resolve(path, cwd, last_dir, debug, debug_out) 113 if rpath: 114 path = rpath 115 if (path.find('/') < 0 or 116 path.find('./') > 0 or 117 path.endswith('/..') or 118 os.path.islink(path)): 119 return os.path.realpath(path) 120 return path 121 122def sort_unique(list, cmp=None, key=None, reverse=False): 123 list.sort(cmp, key, reverse) 124 nl = [] 125 le = None 126 for e in list: 127 if e == le: 128 continue 129 nl.append(e) 130 return nl 131 132def add_trims(x): 133 return ['/' + x + '/', 134 '/' + x, 135 x + '/', 136 x] 137 138class MetaFile: 139 """class to parse meta files generated by bmake.""" 140 141 conf = None 142 dirdep_re = None 143 host_target = None 144 srctops = [] 145 objroots = [] 146 excludes = [] 147 seen = {} 148 obj_deps = [] 149 src_deps = [] 150 file_deps = [] 151 152 def __init__(self, name, conf={}): 153 """if name is set we will parse it now. 154 conf can have the follwing keys: 155 156 SRCTOPS list of tops of the src tree(s). 157 158 CURDIR the src directory 'bmake' was run from. 159 160 RELDIR the relative path from SRCTOP to CURDIR 161 162 MACHINE the machine we built for. 163 set to 'none' if we are not cross-building. 164 More specifically if machine cannot be deduced from objdirs. 165 166 TARGET_SPEC 167 Sometimes MACHINE isn't enough. 168 169 HOST_TARGET 170 when we build for the pseudo machine 'host' 171 the object tree uses HOST_TARGET rather than MACHINE. 172 173 OBJROOTS a list of the common prefix for all obj dirs it might 174 end in '/' or '-'. 175 176 DPDEPS names an optional file to which per file dependencies 177 will be appended. 178 For example if 'some/path/foo.h' is read from SRCTOP 179 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 180 This can allow 'bmake' to learn all the dirs within 181 the tree that depend on 'foo.h' 182 183 EXCLUDES 184 A list of paths to ignore. 185 ccache(1) can otherwise be trouble. 186 187 debug desired debug level 188 189 debug_out open file to send debug output to (sys.stderr) 190 191 """ 192 193 self.name = name 194 self.debug = getv(conf, 'debug', 0) 195 self.debug_out = getv(conf, 'debug_out', sys.stderr) 196 197 self.machine = getv(conf, 'MACHINE', '') 198 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 199 self.target_spec = getv(conf, 'TARGET_SPEC', '') 200 self.curdir = getv(conf, 'CURDIR') 201 self.reldir = getv(conf, 'RELDIR') 202 self.dpdeps = getv(conf, 'DPDEPS') 203 self.line = 0 204 205 if not self.conf: 206 # some of the steps below we want to do only once 207 self.conf = conf 208 self.host_target = getv(conf, 'HOST_TARGET') 209 for srctop in getv(conf, 'SRCTOPS', []): 210 if srctop[-1] != '/': 211 srctop += '/' 212 if not srctop in self.srctops: 213 self.srctops.append(srctop) 214 _srctop = os.path.realpath(srctop) 215 if _srctop[-1] != '/': 216 _srctop += '/' 217 if not _srctop in self.srctops: 218 self.srctops.append(_srctop) 219 220 trim_list = add_trims(self.machine) 221 if self.machine == 'host': 222 trim_list += add_trims(self.host_target) 223 if self.target_spec: 224 trim_list += add_trims(self.target_spec) 225 226 for objroot in getv(conf, 'OBJROOTS', []): 227 for e in trim_list: 228 if objroot.endswith(e): 229 # this is not what we want - fix it 230 objroot = objroot[0:-len(e)] 231 232 if objroot[-1] != '/': 233 objroot += '/' 234 if not objroot in self.objroots: 235 self.objroots.append(objroot) 236 _objroot = os.path.realpath(objroot) 237 if objroot[-1] == '/': 238 _objroot += '/' 239 if not _objroot in self.objroots: 240 self.objroots.append(_objroot) 241 242 # we want the longest match 243 self.srctops.sort(reverse=True) 244 self.objroots.sort(reverse=True) 245 246 self.excludes = getv(conf, 'EXCLUDES', []) 247 248 if self.debug: 249 print("host_target=", self.host_target, file=self.debug_out) 250 print("srctops=", self.srctops, file=self.debug_out) 251 print("objroots=", self.objroots, file=self.debug_out) 252 print("excludes=", self.excludes, file=self.debug_out) 253 254 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 255 256 if self.dpdeps and not self.reldir: 257 if self.debug: 258 print("need reldir:", end=' ', file=self.debug_out) 259 if self.curdir: 260 srctop = self.find_top(self.curdir, self.srctops) 261 if srctop: 262 self.reldir = self.curdir.replace(srctop,'') 263 if self.debug: 264 print(self.reldir, file=self.debug_out) 265 if not self.reldir: 266 self.dpdeps = None # we cannot do it? 267 268 self.cwd = os.getcwd() # make sure this is initialized 269 self.last_dir = self.cwd 270 271 if name: 272 self.try_parse() 273 274 def reset(self): 275 """reset state if we are being passed meta files from multiple directories.""" 276 self.seen = {} 277 self.obj_deps = [] 278 self.src_deps = [] 279 self.file_deps = [] 280 281 def dirdeps(self, sep='\n'): 282 """return DIRDEPS""" 283 return sep.strip() + sep.join(self.obj_deps) 284 285 def src_dirdeps(self, sep='\n'): 286 """return SRC_DIRDEPS""" 287 return sep.strip() + sep.join(self.src_deps) 288 289 def file_depends(self, out=None): 290 """Append DPDEPS_${file} += ${RELDIR} 291 for each file we saw, to the output file.""" 292 if not self.reldir: 293 return None 294 for f in sort_unique(self.file_deps): 295 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 296 # these entries provide for reverse DIRDEPS lookup 297 for f in self.obj_deps: 298 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 299 300 def seenit(self, dir): 301 """rememer that we have seen dir.""" 302 self.seen[dir] = 1 303 304 def add(self, list, data, clue=''): 305 """add data to list if it isn't already there.""" 306 if data not in list: 307 list.append(data) 308 if self.debug: 309 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 310 311 def find_top(self, path, list): 312 """the logical tree may be split across multiple trees""" 313 for top in list: 314 if path.startswith(top): 315 if self.debug > 2: 316 print("found in", top, file=self.debug_out) 317 return top 318 return None 319 320 def find_obj(self, objroot, dir, path, input): 321 """return path within objroot, taking care of .dirdep files""" 322 ddep = None 323 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 324 if not ddep and os.path.exists(ddepf): 325 ddep = open(ddepf, 'r').readline().strip('# \n') 326 if self.debug > 1: 327 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 328 if ddep.endswith(self.machine): 329 ddep = ddep[0:-(1+len(self.machine))] 330 elif self.target_spec and ddep.endswith(self.target_spec): 331 ddep = ddep[0:-(1+len(self.target_spec))] 332 333 if not ddep: 334 # no .dirdeps, so remember that we've seen the raw input 335 self.seenit(input) 336 self.seenit(dir) 337 if self.machine == 'none': 338 if dir.startswith(objroot): 339 return dir.replace(objroot,'') 340 return None 341 m = self.dirdep_re.match(dir.replace(objroot,'')) 342 if m: 343 ddep = m.group(2) 344 dmachine = m.group(1) 345 if dmachine != self.machine: 346 if not (self.machine == 'host' and 347 dmachine == self.host_target): 348 if self.debug > 2: 349 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 350 ddep += '.' + dmachine 351 352 return ddep 353 354 def try_parse(self, name=None, file=None): 355 """give file and line number causing exception""" 356 try: 357 self.parse(name, file) 358 except: 359 # give a useful clue 360 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 361 raise 362 363 def parse(self, name=None, file=None): 364 """A meta file looks like: 365 366 # Meta data file "path" 367 CMD "command-line" 368 CWD "cwd" 369 TARGET "target" 370 -- command output -- 371 -- filemon acquired metadata -- 372 # buildmon version 3 373 V 3 374 C "pid" "cwd" 375 E "pid" "path" 376 F "pid" "child" 377 R "pid" "path" 378 W "pid" "path" 379 X "pid" "status" 380 D "pid" "path" 381 L "pid" "src" "target" 382 M "pid" "old" "new" 383 S "pid" "path" 384 # Bye bye 385 386 We go to some effort to avoid processing a dependency more than once. 387 Of the above record types only C,E,F,L,R,V and W are of interest. 388 """ 389 390 version = 0 # unknown 391 if name: 392 self.name = name; 393 if file: 394 f = file 395 cwd = self.last_dir = self.cwd 396 else: 397 f = open(self.name, 'r') 398 skip = True 399 pid_cwd = {} 400 pid_last_dir = {} 401 last_pid = 0 402 403 self.line = 0 404 if self.curdir: 405 self.seenit(self.curdir) # we ignore this 406 407 interesting = 'CEFLRV' 408 for line in f: 409 self.line += 1 410 # ignore anything we don't care about 411 if not line[0] in interesting: 412 continue 413 if self.debug > 2: 414 print("input:", line, end=' ', file=self.debug_out) 415 w = line.split() 416 417 if skip: 418 if w[0] == 'V': 419 skip = False 420 version = int(w[1]) 421 """ 422 if version < 4: 423 # we cannot ignore 'W' records 424 # as they may be 'rw' 425 interesting += 'W' 426 """ 427 elif w[0] == 'CWD': 428 self.cwd = cwd = self.last_dir = w[1] 429 self.seenit(cwd) # ignore this 430 if self.debug: 431 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 432 continue 433 434 pid = int(w[1]) 435 if pid != last_pid: 436 if last_pid: 437 pid_last_dir[last_pid] = self.last_dir 438 cwd = getv(pid_cwd, pid, self.cwd) 439 self.last_dir = getv(pid_last_dir, pid, self.cwd) 440 last_pid = pid 441 442 # process operations 443 if w[0] == 'F': 444 npid = int(w[2]) 445 pid_cwd[npid] = cwd 446 pid_last_dir[npid] = cwd 447 last_pid = npid 448 continue 449 elif w[0] == 'C': 450 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 451 if cwd.endswith('/.'): 452 cwd = cwd[0:-2] 453 self.last_dir = pid_last_dir[pid] = cwd 454 pid_cwd[pid] = cwd 455 if self.debug > 1: 456 print("cwd=", cwd, file=self.debug_out) 457 continue 458 459 if w[2] in self.seen: 460 if self.debug > 2: 461 print("seen:", w[2], file=self.debug_out) 462 continue 463 # file operations 464 if w[0] in 'ML': 465 # these are special, tread src as read and 466 # target as write 467 self.parse_path(w[1].strip("'"), cwd, 'R', w) 468 self.parse_path(w[2].strip("'"), cwd, 'W', w) 469 continue 470 elif w[0] in 'ERWS': 471 path = w[2] 472 self.parse_path(path, cwd, w[0], w) 473 474 if not file: 475 f.close() 476 477 def parse_path(self, path, cwd, op=None, w=[]): 478 """look at a path for the op specified""" 479 480 if not op: 481 op = w[0] 482 483 # we are never interested in .dirdep files as dependencies 484 if path.endswith('.dirdep'): 485 return 486 for p in self.excludes: 487 if p and path.startswith(p): 488 if self.debug > 2: 489 print("exclude:", p, path, file=self.debug_out) 490 return 491 # we don't want to resolve the last component if it is 492 # a symlink 493 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 494 if not path: 495 return 496 dir,base = os.path.split(path) 497 if dir in self.seen: 498 if self.debug > 2: 499 print("seen:", dir, file=self.debug_out) 500 return 501 # we can have a path in an objdir which is a link 502 # to the src dir, we may need to add dependencies for each 503 rdir = dir 504 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 505 if rdir == dir or rdir.find('./') > 0: 506 rdir = None 507 # now put path back together 508 path = '/'.join([dir,base]) 509 if self.debug > 1: 510 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 511 if op in 'RWS': 512 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 513 if self.debug > 1: 514 print("skipping:", path, file=self.debug_out) 515 return 516 if os.path.isdir(path): 517 if op in 'RW': 518 self.last_dir = path; 519 if self.debug > 1: 520 print("ldir=", self.last_dir, file=self.debug_out) 521 return 522 523 if op in 'ERW': 524 # finally, we get down to it 525 if dir == self.cwd or dir == self.curdir: 526 return 527 srctop = self.find_top(path, self.srctops) 528 if srctop: 529 if self.dpdeps: 530 self.add(self.file_deps, path.replace(srctop,''), 'file') 531 self.add(self.src_deps, dir.replace(srctop,''), 'src') 532 self.seenit(w[2]) 533 self.seenit(dir) 534 if rdir and not rdir.startswith(srctop): 535 dir = rdir # for below 536 rdir = None 537 else: 538 return 539 540 objroot = None 541 for dir in [dir,rdir]: 542 if not dir: 543 continue 544 objroot = self.find_top(dir, self.objroots) 545 if objroot: 546 break 547 if objroot: 548 ddep = self.find_obj(objroot, dir, path, w[2]) 549 if ddep: 550 self.add(self.obj_deps, ddep, 'obj') 551 if self.dpdeps and objroot.endswith('/stage/'): 552 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 553 self.add(self.file_deps, sp, 'file') 554 else: 555 # don't waste time looking again 556 self.seenit(w[2]) 557 self.seenit(dir) 558 559 560def main(argv, klass=MetaFile, xopts='', xoptf=None): 561 """Simple driver for class MetaFile. 562 563 Usage: 564 script [options] [key=value ...] "meta" ... 565 566 Options and key=value pairs contribute to the 567 dictionary passed to MetaFile. 568 569 -S "SRCTOP" 570 add "SRCTOP" to the "SRCTOPS" list. 571 572 -C "CURDIR" 573 574 -O "OBJROOT" 575 add "OBJROOT" to the "OBJROOTS" list. 576 577 -m "MACHINE" 578 579 -a "MACHINE_ARCH" 580 581 -H "HOST_TARGET" 582 583 -D "DPDEPS" 584 585 -d bumps debug level 586 587 """ 588 import getopt 589 590 # import Psyco if we can 591 # it can speed things up quite a bit 592 have_psyco = 0 593 try: 594 import psyco 595 psyco.full() 596 have_psyco = 1 597 except: 598 pass 599 600 conf = { 601 'SRCTOPS': [], 602 'OBJROOTS': [], 603 'EXCLUDES': [], 604 } 605 606 try: 607 machine = os.environ['MACHINE'] 608 if machine: 609 conf['MACHINE'] = machine 610 machine_arch = os.environ['MACHINE_ARCH'] 611 if machine_arch: 612 conf['MACHINE_ARCH'] = machine_arch 613 srctop = os.environ['SB_SRC'] 614 if srctop: 615 conf['SRCTOPS'].append(srctop) 616 objroot = os.environ['SB_OBJROOT'] 617 if objroot: 618 conf['OBJROOTS'].append(objroot) 619 except: 620 pass 621 622 debug = 0 623 output = True 624 625 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 626 for o, a in opts: 627 if o == '-a': 628 conf['MACHINE_ARCH'] = a 629 elif o == '-d': 630 debug += 1 631 elif o == '-q': 632 output = False 633 elif o == '-H': 634 conf['HOST_TARGET'] = a 635 elif o == '-S': 636 if a not in conf['SRCTOPS']: 637 conf['SRCTOPS'].append(a) 638 elif o == '-C': 639 conf['CURDIR'] = a 640 elif o == '-O': 641 if a not in conf['OBJROOTS']: 642 conf['OBJROOTS'].append(a) 643 elif o == '-R': 644 conf['RELDIR'] = a 645 elif o == '-D': 646 conf['DPDEPS'] = a 647 elif o == '-m': 648 conf['MACHINE'] = a 649 elif o == '-T': 650 conf['TARGET_SPEC'] = a 651 elif o == '-X': 652 if a not in conf['EXCLUDES']: 653 conf['EXCLUDES'].append(a) 654 elif xoptf: 655 xoptf(o, a, conf) 656 657 conf['debug'] = debug 658 659 # get any var=val assignments 660 eaten = [] 661 for a in args: 662 if a.find('=') > 0: 663 k,v = a.split('=') 664 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 665 if k == 'SRCTOP': 666 k = 'SRCTOPS' 667 elif k == 'OBJROOT': 668 k = 'OBJROOTS' 669 if v not in conf[k]: 670 conf[k].append(v) 671 else: 672 conf[k] = v 673 eaten.append(a) 674 continue 675 break 676 677 for a in eaten: 678 args.remove(a) 679 680 debug_out = getv(conf, 'debug_out', sys.stderr) 681 682 if debug: 683 print("config:", file=debug_out) 684 print("psyco=", have_psyco, file=debug_out) 685 for k,v in list(conf.items()): 686 print("%s=%s" % (k,v), file=debug_out) 687 688 m = None 689 for a in args: 690 if a.endswith('.meta'): 691 if not os.path.exists(a): 692 continue 693 m = klass(a, conf) 694 elif a.startswith('@'): 695 # there can actually multiple files per line 696 for line in open(a[1:]): 697 for f in line.strip().split(): 698 if not os.path.exists(f): 699 continue 700 m = klass(f, conf) 701 702 if output and m: 703 print(m.dirdeps()) 704 705 print(m.src_dirdeps('\nsrc:')) 706 707 dpdeps = getv(conf, 'DPDEPS') 708 if dpdeps: 709 m.file_depends(open(dpdeps, 'wb')) 710 711 return m 712 713if __name__ == '__main__': 714 try: 715 main(sys.argv) 716 except: 717 # yes, this goes to stdout 718 print("ERROR: ", sys.exc_info()[1]) 719 raise 720 721