1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.19 2016/04/02 20:45:40 sjg Exp $ 41 42 Copyright (c) 2011-2013, Juniper Networks, Inc. 43 All rights reserved. 44 45 Redistribution and use in source and binary forms, with or without 46 modification, are permitted provided that the following conditions 47 are met: 48 1. Redistributions of source code must retain the above copyright 49 notice, this list of conditions and the following disclaimer. 50 2. Redistributions in binary form must reproduce the above copyright 51 notice, this list of conditions and the following disclaimer in the 52 documentation and/or other materials provided with the distribution. 53 54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 66""" 67 68import os, re, sys 69 70def getv(dict, key, d=None): 71 """Lookup key in dict and return value or the supplied default.""" 72 if key in dict: 73 return dict[key] 74 return d 75 76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 77 """ 78 Return an absolute path, resolving via cwd or last_dir if needed. 79 """ 80 if path.endswith('/.'): 81 path = path[0:-2] 82 if len(path) > 0 and path[0] == '/': 83 return path 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 p = '/'.join([d,path]) 94 if debug > 2: 95 print("looking for:", p, end=' ', file=debug_out) 96 if not os.path.exists(p): 97 if debug > 2: 98 print("nope", file=debug_out) 99 p = None 100 continue 101 if debug > 2: 102 print("found:", p, file=debug_out) 103 return p 104 return None 105 106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 107 """ 108 Return an absolute path, resolving via cwd or last_dir if needed. 109 this gets called a lot, so we try to avoid calling realpath 110 until we know we have something. 111 """ 112 rpath = resolve(path, cwd, last_dir, debug, debug_out) 113 if rpath: 114 path = rpath 115 if (path.find('/') < 0 or 116 path.find('./') > 0 or 117 path.endswith('/..') or 118 os.path.islink(path)): 119 return os.path.realpath(path) 120 return path 121 122def sort_unique(list, cmp=None, key=None, reverse=False): 123 list.sort(cmp, key, reverse) 124 nl = [] 125 le = None 126 for e in list: 127 if e == le: 128 continue 129 nl.append(e) 130 return nl 131 132def add_trims(x): 133 return ['/' + x + '/', 134 '/' + x, 135 x + '/', 136 x] 137 138class MetaFile: 139 """class to parse meta files generated by bmake.""" 140 141 conf = None 142 dirdep_re = None 143 host_target = None 144 srctops = [] 145 objroots = [] 146 excludes = [] 147 seen = {} 148 obj_deps = [] 149 src_deps = [] 150 file_deps = [] 151 152 def __init__(self, name, conf={}): 153 """if name is set we will parse it now. 154 conf can have the follwing keys: 155 156 SRCTOPS list of tops of the src tree(s). 157 158 CURDIR the src directory 'bmake' was run from. 159 160 RELDIR the relative path from SRCTOP to CURDIR 161 162 MACHINE the machine we built for. 163 set to 'none' if we are not cross-building. 164 More specifically if machine cannot be deduced from objdirs. 165 166 TARGET_SPEC 167 Sometimes MACHINE isn't enough. 168 169 HOST_TARGET 170 when we build for the pseudo machine 'host' 171 the object tree uses HOST_TARGET rather than MACHINE. 172 173 OBJROOTS a list of the common prefix for all obj dirs it might 174 end in '/' or '-'. 175 176 DPDEPS names an optional file to which per file dependencies 177 will be appended. 178 For example if 'some/path/foo.h' is read from SRCTOP 179 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 180 This can allow 'bmake' to learn all the dirs within 181 the tree that depend on 'foo.h' 182 183 EXCLUDES 184 A list of paths to ignore. 185 ccache(1) can otherwise be trouble. 186 187 debug desired debug level 188 189 debug_out open file to send debug output to (sys.stderr) 190 191 """ 192 193 self.name = name 194 self.debug = getv(conf, 'debug', 0) 195 self.debug_out = getv(conf, 'debug_out', sys.stderr) 196 197 self.machine = getv(conf, 'MACHINE', '') 198 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 199 self.target_spec = getv(conf, 'TARGET_SPEC', '') 200 self.curdir = getv(conf, 'CURDIR') 201 self.reldir = getv(conf, 'RELDIR') 202 self.dpdeps = getv(conf, 'DPDEPS') 203 self.line = 0 204 205 if not self.conf: 206 # some of the steps below we want to do only once 207 self.conf = conf 208 self.host_target = getv(conf, 'HOST_TARGET') 209 for srctop in getv(conf, 'SRCTOPS', []): 210 if srctop[-1] != '/': 211 srctop += '/' 212 if not srctop in self.srctops: 213 self.srctops.append(srctop) 214 _srctop = os.path.realpath(srctop) 215 if _srctop[-1] != '/': 216 _srctop += '/' 217 if not _srctop in self.srctops: 218 self.srctops.append(_srctop) 219 220 trim_list = add_trims(self.machine) 221 if self.machine == 'host': 222 trim_list += add_trims(self.host_target) 223 if self.target_spec: 224 trim_list += add_trims(self.target_spec) 225 226 for objroot in getv(conf, 'OBJROOTS', []): 227 for e in trim_list: 228 if objroot.endswith(e): 229 # this is not what we want - fix it 230 objroot = objroot[0:-len(e)] 231 if e.endswith('/'): 232 objroot += '/' 233 if not objroot in self.objroots: 234 self.objroots.append(objroot) 235 _objroot = os.path.realpath(objroot) 236 if objroot[-1] == '/': 237 _objroot += '/' 238 if not _objroot in self.objroots: 239 self.objroots.append(_objroot) 240 241 # we want the longest match 242 self.srctops.sort(reverse=True) 243 self.objroots.sort(reverse=True) 244 245 self.excludes = getv(conf, 'EXCLUDES', []) 246 247 if self.debug: 248 print("host_target=", self.host_target, file=self.debug_out) 249 print("srctops=", self.srctops, file=self.debug_out) 250 print("objroots=", self.objroots, file=self.debug_out) 251 print("excludes=", self.excludes, file=self.debug_out) 252 253 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 254 255 if self.dpdeps and not self.reldir: 256 if self.debug: 257 print("need reldir:", end=' ', file=self.debug_out) 258 if self.curdir: 259 srctop = self.find_top(self.curdir, self.srctops) 260 if srctop: 261 self.reldir = self.curdir.replace(srctop,'') 262 if self.debug: 263 print(self.reldir, file=self.debug_out) 264 if not self.reldir: 265 self.dpdeps = None # we cannot do it? 266 267 self.cwd = os.getcwd() # make sure this is initialized 268 self.last_dir = self.cwd 269 270 if name: 271 self.try_parse() 272 273 def reset(self): 274 """reset state if we are being passed meta files from multiple directories.""" 275 self.seen = {} 276 self.obj_deps = [] 277 self.src_deps = [] 278 self.file_deps = [] 279 280 def dirdeps(self, sep='\n'): 281 """return DIRDEPS""" 282 return sep.strip() + sep.join(self.obj_deps) 283 284 def src_dirdeps(self, sep='\n'): 285 """return SRC_DIRDEPS""" 286 return sep.strip() + sep.join(self.src_deps) 287 288 def file_depends(self, out=None): 289 """Append DPDEPS_${file} += ${RELDIR} 290 for each file we saw, to the output file.""" 291 if not self.reldir: 292 return None 293 for f in sort_unique(self.file_deps): 294 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 295 296 def seenit(self, dir): 297 """rememer that we have seen dir.""" 298 self.seen[dir] = 1 299 300 def add(self, list, data, clue=''): 301 """add data to list if it isn't already there.""" 302 if data not in list: 303 list.append(data) 304 if self.debug: 305 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 306 307 def find_top(self, path, list): 308 """the logical tree may be split across multiple trees""" 309 for top in list: 310 if path.startswith(top): 311 if self.debug > 2: 312 print("found in", top, file=self.debug_out) 313 return top 314 return None 315 316 def find_obj(self, objroot, dir, path, input): 317 """return path within objroot, taking care of .dirdep files""" 318 ddep = None 319 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 320 if not ddep and os.path.exists(ddepf): 321 ddep = open(ddepf, 'r').readline().strip('# \n') 322 if self.debug > 1: 323 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 324 if ddep.endswith(self.machine): 325 ddep = ddep[0:-(1+len(self.machine))] 326 elif self.target_spec and ddep.endswith(self.target_spec): 327 ddep = ddep[0:-(1+len(self.target_spec))] 328 329 if not ddep: 330 # no .dirdeps, so remember that we've seen the raw input 331 self.seenit(input) 332 self.seenit(dir) 333 if self.machine == 'none': 334 if dir.startswith(objroot): 335 return dir.replace(objroot,'') 336 return None 337 m = self.dirdep_re.match(dir.replace(objroot,'')) 338 if m: 339 ddep = m.group(2) 340 dmachine = m.group(1) 341 if dmachine != self.machine: 342 if not (self.machine == 'host' and 343 dmachine == self.host_target): 344 if self.debug > 2: 345 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 346 ddep += '.' + dmachine 347 348 return ddep 349 350 def try_parse(self, name=None, file=None): 351 """give file and line number causing exception""" 352 try: 353 self.parse(name, file) 354 except: 355 # give a useful clue 356 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 357 raise 358 359 def parse(self, name=None, file=None): 360 """A meta file looks like: 361 362 # Meta data file "path" 363 CMD "command-line" 364 CWD "cwd" 365 TARGET "target" 366 -- command output -- 367 -- filemon acquired metadata -- 368 # buildmon version 3 369 V 3 370 C "pid" "cwd" 371 E "pid" "path" 372 F "pid" "child" 373 R "pid" "path" 374 W "pid" "path" 375 X "pid" "status" 376 D "pid" "path" 377 L "pid" "src" "target" 378 M "pid" "old" "new" 379 S "pid" "path" 380 # Bye bye 381 382 We go to some effort to avoid processing a dependency more than once. 383 Of the above record types only C,E,F,L,R,V and W are of interest. 384 """ 385 386 version = 0 # unknown 387 if name: 388 self.name = name; 389 if file: 390 f = file 391 cwd = self.last_dir = self.cwd 392 else: 393 f = open(self.name, 'r') 394 skip = True 395 pid_cwd = {} 396 pid_last_dir = {} 397 last_pid = 0 398 399 self.line = 0 400 if self.curdir: 401 self.seenit(self.curdir) # we ignore this 402 403 interesting = 'CEFLRV' 404 for line in f: 405 self.line += 1 406 # ignore anything we don't care about 407 if not line[0] in interesting: 408 continue 409 if self.debug > 2: 410 print("input:", line, end=' ', file=self.debug_out) 411 w = line.split() 412 413 if skip: 414 if w[0] == 'V': 415 skip = False 416 version = int(w[1]) 417 """ 418 if version < 4: 419 # we cannot ignore 'W' records 420 # as they may be 'rw' 421 interesting += 'W' 422 """ 423 elif w[0] == 'CWD': 424 self.cwd = cwd = self.last_dir = w[1] 425 self.seenit(cwd) # ignore this 426 if self.debug: 427 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 428 continue 429 430 pid = int(w[1]) 431 if pid != last_pid: 432 if last_pid: 433 pid_cwd[last_pid] = cwd 434 pid_last_dir[last_pid] = self.last_dir 435 cwd = getv(pid_cwd, pid, self.cwd) 436 self.last_dir = getv(pid_last_dir, pid, self.cwd) 437 last_pid = pid 438 439 # process operations 440 if w[0] == 'F': 441 npid = int(w[2]) 442 pid_cwd[npid] = cwd 443 pid_last_dir[npid] = cwd 444 last_pid = npid 445 continue 446 elif w[0] == 'C': 447 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 448 if cwd.endswith('/.'): 449 cwd = cwd[0:-2] 450 self.last_dir = cwd 451 if self.debug > 1: 452 print("cwd=", cwd, file=self.debug_out) 453 continue 454 455 if w[2] in self.seen: 456 if self.debug > 2: 457 print("seen:", w[2], file=self.debug_out) 458 continue 459 # file operations 460 if w[0] in 'ML': 461 # these are special, tread src as read and 462 # target as write 463 self.parse_path(w[1].strip("'"), cwd, 'R', w) 464 self.parse_path(w[2].strip("'"), cwd, 'W', w) 465 continue 466 elif w[0] in 'ERWS': 467 path = w[2] 468 self.parse_path(path, cwd, w[0], w) 469 470 if not file: 471 f.close() 472 473 def parse_path(self, path, cwd, op=None, w=[]): 474 """look at a path for the op specified""" 475 476 if not op: 477 op = w[0] 478 479 # we are never interested in .dirdep files as dependencies 480 if path.endswith('.dirdep'): 481 return 482 for p in self.excludes: 483 if p and path.startswith(p): 484 if self.debug > 2: 485 print("exclude:", p, path, file=self.debug_out) 486 return 487 # we don't want to resolve the last component if it is 488 # a symlink 489 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 490 if not path: 491 return 492 dir,base = os.path.split(path) 493 if dir in self.seen: 494 if self.debug > 2: 495 print("seen:", dir, file=self.debug_out) 496 return 497 # we can have a path in an objdir which is a link 498 # to the src dir, we may need to add dependencies for each 499 rdir = dir 500 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 501 if rdir == dir or rdir.find('./') > 0: 502 rdir = None 503 # now put path back together 504 path = '/'.join([dir,base]) 505 if self.debug > 1: 506 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 507 if op in 'RWS': 508 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 509 if self.debug > 1: 510 print("skipping:", path, file=self.debug_out) 511 return 512 if os.path.isdir(path): 513 if op in 'RW': 514 self.last_dir = path; 515 if self.debug > 1: 516 print("ldir=", self.last_dir, file=self.debug_out) 517 return 518 519 if op in 'ERW': 520 # finally, we get down to it 521 if dir == self.cwd or dir == self.curdir: 522 return 523 srctop = self.find_top(path, self.srctops) 524 if srctop: 525 if self.dpdeps: 526 self.add(self.file_deps, path.replace(srctop,''), 'file') 527 self.add(self.src_deps, dir.replace(srctop,''), 'src') 528 self.seenit(w[2]) 529 self.seenit(dir) 530 if rdir and not rdir.startswith(srctop): 531 dir = rdir # for below 532 rdir = None 533 else: 534 return 535 536 objroot = None 537 for dir in [dir,rdir]: 538 if not dir: 539 continue 540 objroot = self.find_top(dir, self.objroots) 541 if objroot: 542 break 543 if objroot: 544 ddep = self.find_obj(objroot, dir, path, w[2]) 545 if ddep: 546 self.add(self.obj_deps, ddep, 'obj') 547 else: 548 # don't waste time looking again 549 self.seenit(w[2]) 550 self.seenit(dir) 551 552 553def main(argv, klass=MetaFile, xopts='', xoptf=None): 554 """Simple driver for class MetaFile. 555 556 Usage: 557 script [options] [key=value ...] "meta" ... 558 559 Options and key=value pairs contribute to the 560 dictionary passed to MetaFile. 561 562 -S "SRCTOP" 563 add "SRCTOP" to the "SRCTOPS" list. 564 565 -C "CURDIR" 566 567 -O "OBJROOT" 568 add "OBJROOT" to the "OBJROOTS" list. 569 570 -m "MACHINE" 571 572 -a "MACHINE_ARCH" 573 574 -H "HOST_TARGET" 575 576 -D "DPDEPS" 577 578 -d bumps debug level 579 580 """ 581 import getopt 582 583 # import Psyco if we can 584 # it can speed things up quite a bit 585 have_psyco = 0 586 try: 587 import psyco 588 psyco.full() 589 have_psyco = 1 590 except: 591 pass 592 593 conf = { 594 'SRCTOPS': [], 595 'OBJROOTS': [], 596 'EXCLUDES': [], 597 } 598 599 try: 600 machine = os.environ['MACHINE'] 601 if machine: 602 conf['MACHINE'] = machine 603 machine_arch = os.environ['MACHINE_ARCH'] 604 if machine_arch: 605 conf['MACHINE_ARCH'] = machine_arch 606 srctop = os.environ['SB_SRC'] 607 if srctop: 608 conf['SRCTOPS'].append(srctop) 609 objroot = os.environ['SB_OBJROOT'] 610 if objroot: 611 conf['OBJROOTS'].append(objroot) 612 except: 613 pass 614 615 debug = 0 616 output = True 617 618 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 619 for o, a in opts: 620 if o == '-a': 621 conf['MACHINE_ARCH'] = a 622 elif o == '-d': 623 debug += 1 624 elif o == '-q': 625 output = False 626 elif o == '-H': 627 conf['HOST_TARGET'] = a 628 elif o == '-S': 629 if a not in conf['SRCTOPS']: 630 conf['SRCTOPS'].append(a) 631 elif o == '-C': 632 conf['CURDIR'] = a 633 elif o == '-O': 634 if a not in conf['OBJROOTS']: 635 conf['OBJROOTS'].append(a) 636 elif o == '-R': 637 conf['RELDIR'] = a 638 elif o == '-D': 639 conf['DPDEPS'] = a 640 elif o == '-m': 641 conf['MACHINE'] = a 642 elif o == '-T': 643 conf['TARGET_SPEC'] = a 644 elif o == '-X': 645 if a not in conf['EXCLUDES']: 646 conf['EXCLUDES'].append(a) 647 elif xoptf: 648 xoptf(o, a, conf) 649 650 conf['debug'] = debug 651 652 # get any var=val assignments 653 eaten = [] 654 for a in args: 655 if a.find('=') > 0: 656 k,v = a.split('=') 657 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 658 if k == 'SRCTOP': 659 k = 'SRCTOPS' 660 elif k == 'OBJROOT': 661 k = 'OBJROOTS' 662 if v not in conf[k]: 663 conf[k].append(v) 664 else: 665 conf[k] = v 666 eaten.append(a) 667 continue 668 break 669 670 for a in eaten: 671 args.remove(a) 672 673 debug_out = getv(conf, 'debug_out', sys.stderr) 674 675 if debug: 676 print("config:", file=debug_out) 677 print("psyco=", have_psyco, file=debug_out) 678 for k,v in list(conf.items()): 679 print("%s=%s" % (k,v), file=debug_out) 680 681 m = None 682 for a in args: 683 if a.endswith('.meta'): 684 if not os.path.exists(a): 685 continue 686 m = klass(a, conf) 687 elif a.startswith('@'): 688 # there can actually multiple files per line 689 for line in open(a[1:]): 690 for f in line.strip().split(): 691 if not os.path.exists(f): 692 continue 693 m = klass(f, conf) 694 695 if output and m: 696 print(m.dirdeps()) 697 698 print(m.src_dirdeps('\nsrc:')) 699 700 dpdeps = getv(conf, 'DPDEPS') 701 if dpdeps: 702 m.file_depends(open(dpdeps, 'wb')) 703 704 return m 705 706if __name__ == '__main__': 707 try: 708 main(sys.argv) 709 except: 710 # yes, this goes to stdout 711 print("ERROR: ", sys.exc_info()[1]) 712 raise 713 714