1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $FreeBSD$ 41 $Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $ 42 43 Copyright (c) 2011-2013, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 return path 85 if path == '.': 86 return cwd 87 if path.startswith('./'): 88 return cwd + path[1:] 89 if last_dir == cwd: 90 last_dir = None 91 for d in [last_dir, cwd]: 92 if not d: 93 continue 94 p = '/'.join([d,path]) 95 if debug > 2: 96 print("looking for:", p, end=' ', file=debug_out) 97 if not os.path.exists(p): 98 if debug > 2: 99 print("nope", file=debug_out) 100 p = None 101 continue 102 if debug > 2: 103 print("found:", p, file=debug_out) 104 return p 105 return None 106 107def cleanpath(path): 108 """cleanup path without using realpath(3)""" 109 if path.startswith('/'): 110 r = '/' 111 else: 112 r = '' 113 p = [] 114 w = path.split('/') 115 for d in w: 116 if not d or d == '.': 117 continue 118 if d == '..': 119 p.pop() 120 continue 121 p.append(d) 122 123 return r + '/'.join(p) 124 125def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 126 """ 127 Return an absolute path, resolving via cwd or last_dir if needed. 128 this gets called a lot, so we try to avoid calling realpath. 129 """ 130 rpath = resolve(path, cwd, last_dir, debug, debug_out) 131 if rpath: 132 path = rpath 133 if (path.find('/') < 0 or 134 path.find('./') > 0 or 135 path.endswith('/..')): 136 path = cleanpath(path) 137 return path 138 139def sort_unique(list, cmp=None, key=None, reverse=False): 140 list.sort(cmp, key, reverse) 141 nl = [] 142 le = None 143 for e in list: 144 if e == le: 145 continue 146 le = e 147 nl.append(e) 148 return nl 149 150def add_trims(x): 151 return ['/' + x + '/', 152 '/' + x, 153 x + '/', 154 x] 155 156class MetaFile: 157 """class to parse meta files generated by bmake.""" 158 159 conf = None 160 dirdep_re = None 161 host_target = None 162 srctops = [] 163 objroots = [] 164 excludes = [] 165 seen = {} 166 obj_deps = [] 167 src_deps = [] 168 file_deps = [] 169 170 def __init__(self, name, conf={}): 171 """if name is set we will parse it now. 172 conf can have the follwing keys: 173 174 SRCTOPS list of tops of the src tree(s). 175 176 CURDIR the src directory 'bmake' was run from. 177 178 RELDIR the relative path from SRCTOP to CURDIR 179 180 MACHINE the machine we built for. 181 set to 'none' if we are not cross-building. 182 More specifically if machine cannot be deduced from objdirs. 183 184 TARGET_SPEC 185 Sometimes MACHINE isn't enough. 186 187 HOST_TARGET 188 when we build for the pseudo machine 'host' 189 the object tree uses HOST_TARGET rather than MACHINE. 190 191 OBJROOTS a list of the common prefix for all obj dirs it might 192 end in '/' or '-'. 193 194 DPDEPS names an optional file to which per file dependencies 195 will be appended. 196 For example if 'some/path/foo.h' is read from SRCTOP 197 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 198 This can allow 'bmake' to learn all the dirs within 199 the tree that depend on 'foo.h' 200 201 EXCLUDES 202 A list of paths to ignore. 203 ccache(1) can otherwise be trouble. 204 205 debug desired debug level 206 207 debug_out open file to send debug output to (sys.stderr) 208 209 """ 210 211 self.name = name 212 self.debug = getv(conf, 'debug', 0) 213 self.debug_out = getv(conf, 'debug_out', sys.stderr) 214 215 self.machine = getv(conf, 'MACHINE', '') 216 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 217 self.target_spec = getv(conf, 'TARGET_SPEC', '') 218 self.curdir = getv(conf, 'CURDIR') 219 self.reldir = getv(conf, 'RELDIR') 220 self.dpdeps = getv(conf, 'DPDEPS') 221 self.line = 0 222 223 if not self.conf: 224 # some of the steps below we want to do only once 225 self.conf = conf 226 self.host_target = getv(conf, 'HOST_TARGET') 227 for srctop in getv(conf, 'SRCTOPS', []): 228 if srctop[-1] != '/': 229 srctop += '/' 230 if not srctop in self.srctops: 231 self.srctops.append(srctop) 232 _srctop = os.path.realpath(srctop) 233 if _srctop[-1] != '/': 234 _srctop += '/' 235 if not _srctop in self.srctops: 236 self.srctops.append(_srctop) 237 238 trim_list = add_trims(self.machine) 239 if self.machine == 'host': 240 trim_list += add_trims(self.host_target) 241 if self.target_spec: 242 trim_list += add_trims(self.target_spec) 243 244 for objroot in getv(conf, 'OBJROOTS', []): 245 for e in trim_list: 246 if objroot.endswith(e): 247 # this is not what we want - fix it 248 objroot = objroot[0:-len(e)] 249 250 if objroot[-1] != '/': 251 objroot += '/' 252 if not objroot in self.objroots: 253 self.objroots.append(objroot) 254 _objroot = os.path.realpath(objroot) 255 if objroot[-1] == '/': 256 _objroot += '/' 257 if not _objroot in self.objroots: 258 self.objroots.append(_objroot) 259 260 # we want the longest match 261 self.srctops.sort(reverse=True) 262 self.objroots.sort(reverse=True) 263 264 self.excludes = getv(conf, 'EXCLUDES', []) 265 266 if self.debug: 267 print("host_target=", self.host_target, file=self.debug_out) 268 print("srctops=", self.srctops, file=self.debug_out) 269 print("objroots=", self.objroots, file=self.debug_out) 270 print("excludes=", self.excludes, file=self.debug_out) 271 272 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 273 274 if self.dpdeps and not self.reldir: 275 if self.debug: 276 print("need reldir:", end=' ', file=self.debug_out) 277 if self.curdir: 278 srctop = self.find_top(self.curdir, self.srctops) 279 if srctop: 280 self.reldir = self.curdir.replace(srctop,'') 281 if self.debug: 282 print(self.reldir, file=self.debug_out) 283 if not self.reldir: 284 self.dpdeps = None # we cannot do it? 285 286 self.cwd = os.getcwd() # make sure this is initialized 287 self.last_dir = self.cwd 288 289 if name: 290 self.try_parse() 291 292 def reset(self): 293 """reset state if we are being passed meta files from multiple directories.""" 294 self.seen = {} 295 self.obj_deps = [] 296 self.src_deps = [] 297 self.file_deps = [] 298 299 def dirdeps(self, sep='\n'): 300 """return DIRDEPS""" 301 return sep.strip() + sep.join(self.obj_deps) 302 303 def src_dirdeps(self, sep='\n'): 304 """return SRC_DIRDEPS""" 305 return sep.strip() + sep.join(self.src_deps) 306 307 def file_depends(self, out=None): 308 """Append DPDEPS_${file} += ${RELDIR} 309 for each file we saw, to the output file.""" 310 if not self.reldir: 311 return None 312 for f in sort_unique(self.file_deps): 313 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 314 # these entries provide for reverse DIRDEPS lookup 315 for f in self.obj_deps: 316 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 317 318 def seenit(self, dir): 319 """rememer that we have seen dir.""" 320 self.seen[dir] = 1 321 322 def add(self, list, data, clue=''): 323 """add data to list if it isn't already there.""" 324 if data not in list: 325 list.append(data) 326 if self.debug: 327 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 328 329 def find_top(self, path, list): 330 """the logical tree may be split across multiple trees""" 331 for top in list: 332 if path.startswith(top): 333 if self.debug > 2: 334 print("found in", top, file=self.debug_out) 335 return top 336 return None 337 338 def find_obj(self, objroot, dir, path, input): 339 """return path within objroot, taking care of .dirdep files""" 340 ddep = None 341 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 342 if not ddep and os.path.exists(ddepf): 343 ddep = open(ddepf, 'r').readline().strip('# \n') 344 if self.debug > 1: 345 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 346 if ddep.endswith(self.machine): 347 ddep = ddep[0:-(1+len(self.machine))] 348 elif self.target_spec and ddep.endswith(self.target_spec): 349 ddep = ddep[0:-(1+len(self.target_spec))] 350 351 if not ddep: 352 # no .dirdeps, so remember that we've seen the raw input 353 self.seenit(input) 354 self.seenit(dir) 355 if self.machine == 'none': 356 if dir.startswith(objroot): 357 return dir.replace(objroot,'') 358 return None 359 m = self.dirdep_re.match(dir.replace(objroot,'')) 360 if m: 361 ddep = m.group(2) 362 dmachine = m.group(1) 363 if dmachine != self.machine: 364 if not (self.machine == 'host' and 365 dmachine == self.host_target): 366 if self.debug > 2: 367 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 368 ddep += '.' + dmachine 369 370 return ddep 371 372 def try_parse(self, name=None, file=None): 373 """give file and line number causing exception""" 374 try: 375 self.parse(name, file) 376 except: 377 # give a useful clue 378 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 379 raise 380 381 def parse(self, name=None, file=None): 382 """A meta file looks like: 383 384 # Meta data file "path" 385 CMD "command-line" 386 CWD "cwd" 387 TARGET "target" 388 -- command output -- 389 -- filemon acquired metadata -- 390 # buildmon version 3 391 V 3 392 C "pid" "cwd" 393 E "pid" "path" 394 F "pid" "child" 395 R "pid" "path" 396 W "pid" "path" 397 X "pid" "status" 398 D "pid" "path" 399 L "pid" "src" "target" 400 M "pid" "old" "new" 401 S "pid" "path" 402 # Bye bye 403 404 We go to some effort to avoid processing a dependency more than once. 405 Of the above record types only C,E,F,L,R,V and W are of interest. 406 """ 407 408 version = 0 # unknown 409 if name: 410 self.name = name; 411 if file: 412 f = file 413 cwd = self.last_dir = self.cwd 414 else: 415 f = open(self.name, 'r') 416 skip = True 417 pid_cwd = {} 418 pid_last_dir = {} 419 last_pid = 0 420 421 self.line = 0 422 if self.curdir: 423 self.seenit(self.curdir) # we ignore this 424 425 interesting = 'CEFLRV' 426 for line in f: 427 self.line += 1 428 # ignore anything we don't care about 429 if not line[0] in interesting: 430 continue 431 if self.debug > 2: 432 print("input:", line, end=' ', file=self.debug_out) 433 w = line.split() 434 435 if skip: 436 if w[0] == 'V': 437 skip = False 438 version = int(w[1]) 439 """ 440 if version < 4: 441 # we cannot ignore 'W' records 442 # as they may be 'rw' 443 interesting += 'W' 444 """ 445 elif w[0] == 'CWD': 446 self.cwd = cwd = self.last_dir = w[1] 447 self.seenit(cwd) # ignore this 448 if self.debug: 449 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 450 continue 451 452 pid = int(w[1]) 453 if pid != last_pid: 454 if last_pid: 455 pid_last_dir[last_pid] = self.last_dir 456 cwd = getv(pid_cwd, pid, self.cwd) 457 self.last_dir = getv(pid_last_dir, pid, self.cwd) 458 last_pid = pid 459 460 # process operations 461 if w[0] == 'F': 462 npid = int(w[2]) 463 pid_cwd[npid] = cwd 464 pid_last_dir[npid] = cwd 465 last_pid = npid 466 continue 467 elif w[0] == 'C': 468 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 469 if cwd.endswith('/.'): 470 cwd = cwd[0:-2] 471 self.last_dir = pid_last_dir[pid] = cwd 472 pid_cwd[pid] = cwd 473 if self.debug > 1: 474 print("cwd=", cwd, file=self.debug_out) 475 continue 476 477 if w[2] in self.seen: 478 if self.debug > 2: 479 print("seen:", w[2], file=self.debug_out) 480 continue 481 # file operations 482 if w[0] in 'ML': 483 # these are special, tread src as read and 484 # target as write 485 self.parse_path(w[1].strip("'"), cwd, 'R', w) 486 self.parse_path(w[2].strip("'"), cwd, 'W', w) 487 continue 488 elif w[0] in 'ERWS': 489 path = w[2] 490 self.parse_path(path, cwd, w[0], w) 491 492 if not file: 493 f.close() 494 495 def is_src(self, base, dir, rdir): 496 """is base in srctop""" 497 for dir in [dir,rdir]: 498 if not dir: 499 continue 500 path = '/'.join([dir,base]) 501 srctop = self.find_top(path, self.srctops) 502 if srctop: 503 if self.dpdeps: 504 self.add(self.file_deps, path.replace(srctop,''), 'file') 505 self.add(self.src_deps, dir.replace(srctop,''), 'src') 506 self.seenit(dir) 507 return True 508 return False 509 510 def parse_path(self, path, cwd, op=None, w=[]): 511 """look at a path for the op specified""" 512 513 if not op: 514 op = w[0] 515 516 # we are never interested in .dirdep files as dependencies 517 if path.endswith('.dirdep'): 518 return 519 for p in self.excludes: 520 if p and path.startswith(p): 521 if self.debug > 2: 522 print("exclude:", p, path, file=self.debug_out) 523 return 524 # we don't want to resolve the last component if it is 525 # a symlink 526 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 527 if not path: 528 return 529 dir,base = os.path.split(path) 530 if dir in self.seen: 531 if self.debug > 2: 532 print("seen:", dir, file=self.debug_out) 533 return 534 # we can have a path in an objdir which is a link 535 # to the src dir, we may need to add dependencies for each 536 rdir = dir 537 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 538 rdir = os.path.realpath(dir) 539 if rdir == dir: 540 rdir = None 541 # now put path back together 542 path = '/'.join([dir,base]) 543 if self.debug > 1: 544 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 545 if op in 'RWS': 546 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 547 if self.debug > 1: 548 print("skipping:", path, file=self.debug_out) 549 return 550 if os.path.isdir(path): 551 if op in 'RW': 552 self.last_dir = path; 553 if self.debug > 1: 554 print("ldir=", self.last_dir, file=self.debug_out) 555 return 556 557 if op in 'ERW': 558 # finally, we get down to it 559 if dir == self.cwd or dir == self.curdir: 560 return 561 if self.is_src(base, dir, rdir): 562 self.seenit(w[2]) 563 if not rdir: 564 return 565 566 objroot = None 567 for dir in [dir,rdir]: 568 if not dir: 569 continue 570 objroot = self.find_top(dir, self.objroots) 571 if objroot: 572 break 573 if objroot: 574 ddep = self.find_obj(objroot, dir, path, w[2]) 575 if ddep: 576 self.add(self.obj_deps, ddep, 'obj') 577 if self.dpdeps and objroot.endswith('/stage/'): 578 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 579 self.add(self.file_deps, sp, 'file') 580 else: 581 # don't waste time looking again 582 self.seenit(w[2]) 583 self.seenit(dir) 584 585 586def main(argv, klass=MetaFile, xopts='', xoptf=None): 587 """Simple driver for class MetaFile. 588 589 Usage: 590 script [options] [key=value ...] "meta" ... 591 592 Options and key=value pairs contribute to the 593 dictionary passed to MetaFile. 594 595 -S "SRCTOP" 596 add "SRCTOP" to the "SRCTOPS" list. 597 598 -C "CURDIR" 599 600 -O "OBJROOT" 601 add "OBJROOT" to the "OBJROOTS" list. 602 603 -m "MACHINE" 604 605 -a "MACHINE_ARCH" 606 607 -H "HOST_TARGET" 608 609 -D "DPDEPS" 610 611 -d bumps debug level 612 613 """ 614 import getopt 615 616 # import Psyco if we can 617 # it can speed things up quite a bit 618 have_psyco = 0 619 try: 620 import psyco 621 psyco.full() 622 have_psyco = 1 623 except: 624 pass 625 626 conf = { 627 'SRCTOPS': [], 628 'OBJROOTS': [], 629 'EXCLUDES': [], 630 } 631 632 try: 633 machine = os.environ['MACHINE'] 634 if machine: 635 conf['MACHINE'] = machine 636 machine_arch = os.environ['MACHINE_ARCH'] 637 if machine_arch: 638 conf['MACHINE_ARCH'] = machine_arch 639 srctop = os.environ['SB_SRC'] 640 if srctop: 641 conf['SRCTOPS'].append(srctop) 642 objroot = os.environ['SB_OBJROOT'] 643 if objroot: 644 conf['OBJROOTS'].append(objroot) 645 except: 646 pass 647 648 debug = 0 649 output = True 650 651 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 652 for o, a in opts: 653 if o == '-a': 654 conf['MACHINE_ARCH'] = a 655 elif o == '-d': 656 debug += 1 657 elif o == '-q': 658 output = False 659 elif o == '-H': 660 conf['HOST_TARGET'] = a 661 elif o == '-S': 662 if a not in conf['SRCTOPS']: 663 conf['SRCTOPS'].append(a) 664 elif o == '-C': 665 conf['CURDIR'] = a 666 elif o == '-O': 667 if a not in conf['OBJROOTS']: 668 conf['OBJROOTS'].append(a) 669 elif o == '-R': 670 conf['RELDIR'] = a 671 elif o == '-D': 672 conf['DPDEPS'] = a 673 elif o == '-m': 674 conf['MACHINE'] = a 675 elif o == '-T': 676 conf['TARGET_SPEC'] = a 677 elif o == '-X': 678 if a not in conf['EXCLUDES']: 679 conf['EXCLUDES'].append(a) 680 elif xoptf: 681 xoptf(o, a, conf) 682 683 conf['debug'] = debug 684 685 # get any var=val assignments 686 eaten = [] 687 for a in args: 688 if a.find('=') > 0: 689 k,v = a.split('=') 690 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 691 if k == 'SRCTOP': 692 k = 'SRCTOPS' 693 elif k == 'OBJROOT': 694 k = 'OBJROOTS' 695 if v not in conf[k]: 696 conf[k].append(v) 697 else: 698 conf[k] = v 699 eaten.append(a) 700 continue 701 break 702 703 for a in eaten: 704 args.remove(a) 705 706 debug_out = getv(conf, 'debug_out', sys.stderr) 707 708 if debug: 709 print("config:", file=debug_out) 710 print("psyco=", have_psyco, file=debug_out) 711 for k,v in list(conf.items()): 712 print("%s=%s" % (k,v), file=debug_out) 713 714 m = None 715 for a in args: 716 if a.endswith('.meta'): 717 if not os.path.exists(a): 718 continue 719 m = klass(a, conf) 720 elif a.startswith('@'): 721 # there can actually multiple files per line 722 for line in open(a[1:]): 723 for f in line.strip().split(): 724 if not os.path.exists(f): 725 continue 726 m = klass(f, conf) 727 728 if output and m: 729 print(m.dirdeps()) 730 731 print(m.src_dirdeps('\nsrc:')) 732 733 dpdeps = getv(conf, 'DPDEPS') 734 if dpdeps: 735 m.file_depends(open(dpdeps, 'wb')) 736 737 return m 738 739if __name__ == '__main__': 740 try: 741 main(sys.argv) 742 except: 743 # yes, this goes to stdout 744 print("ERROR: ", sys.exc_info()[1]) 745 raise 746 747