1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $ 41 42 Copyright (c) 2011-2013, Juniper Networks, Inc. 43 All rights reserved. 44 45 Redistribution and use in source and binary forms, with or without 46 modification, are permitted provided that the following conditions 47 are met: 48 1. Redistributions of source code must retain the above copyright 49 notice, this list of conditions and the following disclaimer. 50 2. Redistributions in binary form must reproduce the above copyright 51 notice, this list of conditions and the following disclaimer in the 52 documentation and/or other materials provided with the distribution. 53 54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 66""" 67 68import os, re, sys 69 70def getv(dict, key, d=None): 71 """Lookup key in dict and return value or the supplied default.""" 72 if key in dict: 73 return dict[key] 74 return d 75 76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 77 """ 78 Return an absolute path, resolving via cwd or last_dir if needed. 79 """ 80 if path.endswith('/.'): 81 path = path[0:-2] 82 if len(path) > 0 and path[0] == '/': 83 return path 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 p = '/'.join([d,path]) 94 if debug > 2: 95 print("looking for:", p, end=' ', file=debug_out) 96 if not os.path.exists(p): 97 if debug > 2: 98 print("nope", file=debug_out) 99 p = None 100 continue 101 if debug > 2: 102 print("found:", p, file=debug_out) 103 return p 104 return None 105 106def cleanpath(path): 107 """cleanup path without using realpath(3)""" 108 if path.startswith('/'): 109 r = '/' 110 else: 111 r = '' 112 p = [] 113 w = path.split('/') 114 for d in w: 115 if not d or d == '.': 116 continue 117 if d == '..': 118 p.pop() 119 continue 120 p.append(d) 121 122 return r + '/'.join(p) 123 124def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 125 """ 126 Return an absolute path, resolving via cwd or last_dir if needed. 127 this gets called a lot, so we try to avoid calling realpath. 128 """ 129 rpath = resolve(path, cwd, last_dir, debug, debug_out) 130 if rpath: 131 path = rpath 132 if (path.find('/') < 0 or 133 path.find('./') > 0 or 134 path.endswith('/..')): 135 path = cleanpath(path) 136 return path 137 138def sort_unique(list, cmp=None, key=None, reverse=False): 139 list.sort(cmp, key, reverse) 140 nl = [] 141 le = None 142 for e in list: 143 if e == le: 144 continue 145 le = e 146 nl.append(e) 147 return nl 148 149def add_trims(x): 150 return ['/' + x + '/', 151 '/' + x, 152 x + '/', 153 x] 154 155class MetaFile: 156 """class to parse meta files generated by bmake.""" 157 158 conf = None 159 dirdep_re = None 160 host_target = None 161 srctops = [] 162 objroots = [] 163 excludes = [] 164 seen = {} 165 obj_deps = [] 166 src_deps = [] 167 file_deps = [] 168 169 def __init__(self, name, conf={}): 170 """if name is set we will parse it now. 171 conf can have the follwing keys: 172 173 SRCTOPS list of tops of the src tree(s). 174 175 CURDIR the src directory 'bmake' was run from. 176 177 RELDIR the relative path from SRCTOP to CURDIR 178 179 MACHINE the machine we built for. 180 set to 'none' if we are not cross-building. 181 More specifically if machine cannot be deduced from objdirs. 182 183 TARGET_SPEC 184 Sometimes MACHINE isn't enough. 185 186 HOST_TARGET 187 when we build for the pseudo machine 'host' 188 the object tree uses HOST_TARGET rather than MACHINE. 189 190 OBJROOTS a list of the common prefix for all obj dirs it might 191 end in '/' or '-'. 192 193 DPDEPS names an optional file to which per file dependencies 194 will be appended. 195 For example if 'some/path/foo.h' is read from SRCTOP 196 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 197 This can allow 'bmake' to learn all the dirs within 198 the tree that depend on 'foo.h' 199 200 EXCLUDES 201 A list of paths to ignore. 202 ccache(1) can otherwise be trouble. 203 204 debug desired debug level 205 206 debug_out open file to send debug output to (sys.stderr) 207 208 """ 209 210 self.name = name 211 self.debug = getv(conf, 'debug', 0) 212 self.debug_out = getv(conf, 'debug_out', sys.stderr) 213 214 self.machine = getv(conf, 'MACHINE', '') 215 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 216 self.target_spec = getv(conf, 'TARGET_SPEC', '') 217 self.curdir = getv(conf, 'CURDIR') 218 self.reldir = getv(conf, 'RELDIR') 219 self.dpdeps = getv(conf, 'DPDEPS') 220 self.line = 0 221 222 if not self.conf: 223 # some of the steps below we want to do only once 224 self.conf = conf 225 self.host_target = getv(conf, 'HOST_TARGET') 226 for srctop in getv(conf, 'SRCTOPS', []): 227 if srctop[-1] != '/': 228 srctop += '/' 229 if not srctop in self.srctops: 230 self.srctops.append(srctop) 231 _srctop = os.path.realpath(srctop) 232 if _srctop[-1] != '/': 233 _srctop += '/' 234 if not _srctop in self.srctops: 235 self.srctops.append(_srctop) 236 237 trim_list = add_trims(self.machine) 238 if self.machine == 'host': 239 trim_list += add_trims(self.host_target) 240 if self.target_spec: 241 trim_list += add_trims(self.target_spec) 242 243 for objroot in getv(conf, 'OBJROOTS', []): 244 for e in trim_list: 245 if objroot.endswith(e): 246 # this is not what we want - fix it 247 objroot = objroot[0:-len(e)] 248 249 if objroot[-1] != '/': 250 objroot += '/' 251 if not objroot in self.objroots: 252 self.objroots.append(objroot) 253 _objroot = os.path.realpath(objroot) 254 if objroot[-1] == '/': 255 _objroot += '/' 256 if not _objroot in self.objroots: 257 self.objroots.append(_objroot) 258 259 # we want the longest match 260 self.srctops.sort(reverse=True) 261 self.objroots.sort(reverse=True) 262 263 self.excludes = getv(conf, 'EXCLUDES', []) 264 265 if self.debug: 266 print("host_target=", self.host_target, file=self.debug_out) 267 print("srctops=", self.srctops, file=self.debug_out) 268 print("objroots=", self.objroots, file=self.debug_out) 269 print("excludes=", self.excludes, file=self.debug_out) 270 271 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 272 273 if self.dpdeps and not self.reldir: 274 if self.debug: 275 print("need reldir:", end=' ', file=self.debug_out) 276 if self.curdir: 277 srctop = self.find_top(self.curdir, self.srctops) 278 if srctop: 279 self.reldir = self.curdir.replace(srctop,'') 280 if self.debug: 281 print(self.reldir, file=self.debug_out) 282 if not self.reldir: 283 self.dpdeps = None # we cannot do it? 284 285 self.cwd = os.getcwd() # make sure this is initialized 286 self.last_dir = self.cwd 287 288 if name: 289 self.try_parse() 290 291 def reset(self): 292 """reset state if we are being passed meta files from multiple directories.""" 293 self.seen = {} 294 self.obj_deps = [] 295 self.src_deps = [] 296 self.file_deps = [] 297 298 def dirdeps(self, sep='\n'): 299 """return DIRDEPS""" 300 return sep.strip() + sep.join(self.obj_deps) 301 302 def src_dirdeps(self, sep='\n'): 303 """return SRC_DIRDEPS""" 304 return sep.strip() + sep.join(self.src_deps) 305 306 def file_depends(self, out=None): 307 """Append DPDEPS_${file} += ${RELDIR} 308 for each file we saw, to the output file.""" 309 if not self.reldir: 310 return None 311 for f in sort_unique(self.file_deps): 312 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 313 # these entries provide for reverse DIRDEPS lookup 314 for f in self.obj_deps: 315 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 316 317 def seenit(self, dir): 318 """rememer that we have seen dir.""" 319 self.seen[dir] = 1 320 321 def add(self, list, data, clue=''): 322 """add data to list if it isn't already there.""" 323 if data not in list: 324 list.append(data) 325 if self.debug: 326 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 327 328 def find_top(self, path, list): 329 """the logical tree may be split across multiple trees""" 330 for top in list: 331 if path.startswith(top): 332 if self.debug > 2: 333 print("found in", top, file=self.debug_out) 334 return top 335 return None 336 337 def find_obj(self, objroot, dir, path, input): 338 """return path within objroot, taking care of .dirdep files""" 339 ddep = None 340 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 341 if not ddep and os.path.exists(ddepf): 342 ddep = open(ddepf, 'r').readline().strip('# \n') 343 if self.debug > 1: 344 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 345 if ddep.endswith(self.machine): 346 ddep = ddep[0:-(1+len(self.machine))] 347 elif self.target_spec and ddep.endswith(self.target_spec): 348 ddep = ddep[0:-(1+len(self.target_spec))] 349 350 if not ddep: 351 # no .dirdeps, so remember that we've seen the raw input 352 self.seenit(input) 353 self.seenit(dir) 354 if self.machine == 'none': 355 if dir.startswith(objroot): 356 return dir.replace(objroot,'') 357 return None 358 m = self.dirdep_re.match(dir.replace(objroot,'')) 359 if m: 360 ddep = m.group(2) 361 dmachine = m.group(1) 362 if dmachine != self.machine: 363 if not (self.machine == 'host' and 364 dmachine == self.host_target): 365 if self.debug > 2: 366 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 367 ddep += '.' + dmachine 368 369 return ddep 370 371 def try_parse(self, name=None, file=None): 372 """give file and line number causing exception""" 373 try: 374 self.parse(name, file) 375 except: 376 # give a useful clue 377 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 378 raise 379 380 def parse(self, name=None, file=None): 381 """A meta file looks like: 382 383 # Meta data file "path" 384 CMD "command-line" 385 CWD "cwd" 386 TARGET "target" 387 -- command output -- 388 -- filemon acquired metadata -- 389 # buildmon version 3 390 V 3 391 C "pid" "cwd" 392 E "pid" "path" 393 F "pid" "child" 394 R "pid" "path" 395 W "pid" "path" 396 X "pid" "status" 397 D "pid" "path" 398 L "pid" "src" "target" 399 M "pid" "old" "new" 400 S "pid" "path" 401 # Bye bye 402 403 We go to some effort to avoid processing a dependency more than once. 404 Of the above record types only C,E,F,L,R,V and W are of interest. 405 """ 406 407 version = 0 # unknown 408 if name: 409 self.name = name; 410 if file: 411 f = file 412 cwd = self.last_dir = self.cwd 413 else: 414 f = open(self.name, 'r') 415 skip = True 416 pid_cwd = {} 417 pid_last_dir = {} 418 last_pid = 0 419 420 self.line = 0 421 if self.curdir: 422 self.seenit(self.curdir) # we ignore this 423 424 interesting = 'CEFLRV' 425 for line in f: 426 self.line += 1 427 # ignore anything we don't care about 428 if not line[0] in interesting: 429 continue 430 if self.debug > 2: 431 print("input:", line, end=' ', file=self.debug_out) 432 w = line.split() 433 434 if skip: 435 if w[0] == 'V': 436 skip = False 437 version = int(w[1]) 438 """ 439 if version < 4: 440 # we cannot ignore 'W' records 441 # as they may be 'rw' 442 interesting += 'W' 443 """ 444 elif w[0] == 'CWD': 445 self.cwd = cwd = self.last_dir = w[1] 446 self.seenit(cwd) # ignore this 447 if self.debug: 448 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 449 continue 450 451 pid = int(w[1]) 452 if pid != last_pid: 453 if last_pid: 454 pid_last_dir[last_pid] = self.last_dir 455 cwd = getv(pid_cwd, pid, self.cwd) 456 self.last_dir = getv(pid_last_dir, pid, self.cwd) 457 last_pid = pid 458 459 # process operations 460 if w[0] == 'F': 461 npid = int(w[2]) 462 pid_cwd[npid] = cwd 463 pid_last_dir[npid] = cwd 464 last_pid = npid 465 continue 466 elif w[0] == 'C': 467 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 468 if cwd.endswith('/.'): 469 cwd = cwd[0:-2] 470 self.last_dir = pid_last_dir[pid] = cwd 471 pid_cwd[pid] = cwd 472 if self.debug > 1: 473 print("cwd=", cwd, file=self.debug_out) 474 continue 475 476 if w[2] in self.seen: 477 if self.debug > 2: 478 print("seen:", w[2], file=self.debug_out) 479 continue 480 # file operations 481 if w[0] in 'ML': 482 # these are special, tread src as read and 483 # target as write 484 self.parse_path(w[1].strip("'"), cwd, 'R', w) 485 self.parse_path(w[2].strip("'"), cwd, 'W', w) 486 continue 487 elif w[0] in 'ERWS': 488 path = w[2] 489 self.parse_path(path, cwd, w[0], w) 490 491 if not file: 492 f.close() 493 494 def parse_path(self, path, cwd, op=None, w=[]): 495 """look at a path for the op specified""" 496 497 if not op: 498 op = w[0] 499 500 # we are never interested in .dirdep files as dependencies 501 if path.endswith('.dirdep'): 502 return 503 for p in self.excludes: 504 if p and path.startswith(p): 505 if self.debug > 2: 506 print("exclude:", p, path, file=self.debug_out) 507 return 508 # we don't want to resolve the last component if it is 509 # a symlink 510 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 511 if not path: 512 return 513 dir,base = os.path.split(path) 514 if dir in self.seen: 515 if self.debug > 2: 516 print("seen:", dir, file=self.debug_out) 517 return 518 # we can have a path in an objdir which is a link 519 # to the src dir, we may need to add dependencies for each 520 rdir = dir 521 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 522 if rdir == dir or rdir.find('./') > 0: 523 rdir = None 524 if os.path.islink(dir): 525 rdir = os.path.realpath(dir) 526 # now put path back together 527 path = '/'.join([dir,base]) 528 if self.debug > 1: 529 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 530 if op in 'RWS': 531 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 532 if self.debug > 1: 533 print("skipping:", path, file=self.debug_out) 534 return 535 if os.path.isdir(path): 536 if op in 'RW': 537 self.last_dir = path; 538 if self.debug > 1: 539 print("ldir=", self.last_dir, file=self.debug_out) 540 return 541 542 if op in 'ERW': 543 # finally, we get down to it 544 if dir == self.cwd or dir == self.curdir: 545 return 546 srctop = self.find_top(path, self.srctops) 547 if srctop: 548 if self.dpdeps: 549 self.add(self.file_deps, path.replace(srctop,''), 'file') 550 self.add(self.src_deps, dir.replace(srctop,''), 'src') 551 self.seenit(w[2]) 552 self.seenit(dir) 553 if rdir and not rdir.startswith(srctop): 554 dir = rdir # for below 555 rdir = None 556 else: 557 return 558 559 objroot = None 560 for dir in [dir,rdir]: 561 if not dir: 562 continue 563 objroot = self.find_top(dir, self.objroots) 564 if objroot: 565 break 566 if objroot: 567 ddep = self.find_obj(objroot, dir, path, w[2]) 568 if ddep: 569 self.add(self.obj_deps, ddep, 'obj') 570 if self.dpdeps and objroot.endswith('/stage/'): 571 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 572 self.add(self.file_deps, sp, 'file') 573 else: 574 # don't waste time looking again 575 self.seenit(w[2]) 576 self.seenit(dir) 577 578 579def main(argv, klass=MetaFile, xopts='', xoptf=None): 580 """Simple driver for class MetaFile. 581 582 Usage: 583 script [options] [key=value ...] "meta" ... 584 585 Options and key=value pairs contribute to the 586 dictionary passed to MetaFile. 587 588 -S "SRCTOP" 589 add "SRCTOP" to the "SRCTOPS" list. 590 591 -C "CURDIR" 592 593 -O "OBJROOT" 594 add "OBJROOT" to the "OBJROOTS" list. 595 596 -m "MACHINE" 597 598 -a "MACHINE_ARCH" 599 600 -H "HOST_TARGET" 601 602 -D "DPDEPS" 603 604 -d bumps debug level 605 606 """ 607 import getopt 608 609 # import Psyco if we can 610 # it can speed things up quite a bit 611 have_psyco = 0 612 try: 613 import psyco 614 psyco.full() 615 have_psyco = 1 616 except: 617 pass 618 619 conf = { 620 'SRCTOPS': [], 621 'OBJROOTS': [], 622 'EXCLUDES': [], 623 } 624 625 try: 626 machine = os.environ['MACHINE'] 627 if machine: 628 conf['MACHINE'] = machine 629 machine_arch = os.environ['MACHINE_ARCH'] 630 if machine_arch: 631 conf['MACHINE_ARCH'] = machine_arch 632 srctop = os.environ['SB_SRC'] 633 if srctop: 634 conf['SRCTOPS'].append(srctop) 635 objroot = os.environ['SB_OBJROOT'] 636 if objroot: 637 conf['OBJROOTS'].append(objroot) 638 except: 639 pass 640 641 debug = 0 642 output = True 643 644 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 645 for o, a in opts: 646 if o == '-a': 647 conf['MACHINE_ARCH'] = a 648 elif o == '-d': 649 debug += 1 650 elif o == '-q': 651 output = False 652 elif o == '-H': 653 conf['HOST_TARGET'] = a 654 elif o == '-S': 655 if a not in conf['SRCTOPS']: 656 conf['SRCTOPS'].append(a) 657 elif o == '-C': 658 conf['CURDIR'] = a 659 elif o == '-O': 660 if a not in conf['OBJROOTS']: 661 conf['OBJROOTS'].append(a) 662 elif o == '-R': 663 conf['RELDIR'] = a 664 elif o == '-D': 665 conf['DPDEPS'] = a 666 elif o == '-m': 667 conf['MACHINE'] = a 668 elif o == '-T': 669 conf['TARGET_SPEC'] = a 670 elif o == '-X': 671 if a not in conf['EXCLUDES']: 672 conf['EXCLUDES'].append(a) 673 elif xoptf: 674 xoptf(o, a, conf) 675 676 conf['debug'] = debug 677 678 # get any var=val assignments 679 eaten = [] 680 for a in args: 681 if a.find('=') > 0: 682 k,v = a.split('=') 683 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 684 if k == 'SRCTOP': 685 k = 'SRCTOPS' 686 elif k == 'OBJROOT': 687 k = 'OBJROOTS' 688 if v not in conf[k]: 689 conf[k].append(v) 690 else: 691 conf[k] = v 692 eaten.append(a) 693 continue 694 break 695 696 for a in eaten: 697 args.remove(a) 698 699 debug_out = getv(conf, 'debug_out', sys.stderr) 700 701 if debug: 702 print("config:", file=debug_out) 703 print("psyco=", have_psyco, file=debug_out) 704 for k,v in list(conf.items()): 705 print("%s=%s" % (k,v), file=debug_out) 706 707 m = None 708 for a in args: 709 if a.endswith('.meta'): 710 if not os.path.exists(a): 711 continue 712 m = klass(a, conf) 713 elif a.startswith('@'): 714 # there can actually multiple files per line 715 for line in open(a[1:]): 716 for f in line.strip().split(): 717 if not os.path.exists(f): 718 continue 719 m = klass(f, conf) 720 721 if output and m: 722 print(m.dirdeps()) 723 724 print(m.src_dirdeps('\nsrc:')) 725 726 dpdeps = getv(conf, 'DPDEPS') 727 if dpdeps: 728 m.file_depends(open(dpdeps, 'wb')) 729 730 return m 731 732if __name__ == '__main__': 733 try: 734 main(sys.argv) 735 except: 736 # yes, this goes to stdout 737 print("ERROR: ", sys.exc_info()[1]) 738 raise 739 740