1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $FreeBSD$ 41 $Id: meta2deps.py,v 1.22 2016/12/12 19:07:42 sjg Exp $ 42 43 Copyright (c) 2011-2013, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 return path 85 if path == '.': 86 return cwd 87 if path.startswith('./'): 88 return cwd + path[1:] 89 if last_dir == cwd: 90 last_dir = None 91 for d in [last_dir, cwd]: 92 if not d: 93 continue 94 p = '/'.join([d,path]) 95 if debug > 2: 96 print("looking for:", p, end=' ', file=debug_out) 97 if not os.path.exists(p): 98 if debug > 2: 99 print("nope", file=debug_out) 100 p = None 101 continue 102 if debug > 2: 103 print("found:", p, file=debug_out) 104 return p 105 return None 106 107def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 108 """ 109 Return an absolute path, resolving via cwd or last_dir if needed. 110 this gets called a lot, so we try to avoid calling realpath 111 until we know we have something. 112 """ 113 rpath = resolve(path, cwd, last_dir, debug, debug_out) 114 if rpath: 115 path = rpath 116 if (path.find('/') < 0 or 117 path.find('./') > 0 or 118 path.endswith('/..') or 119 os.path.islink(path)): 120 return os.path.realpath(path) 121 return path 122 123def sort_unique(list, cmp=None, key=None, reverse=False): 124 list.sort(cmp, key, reverse) 125 nl = [] 126 le = None 127 for e in list: 128 if e == le: 129 continue 130 nl.append(e) 131 return nl 132 133def add_trims(x): 134 return ['/' + x + '/', 135 '/' + x, 136 x + '/', 137 x] 138 139class MetaFile: 140 """class to parse meta files generated by bmake.""" 141 142 conf = None 143 dirdep_re = None 144 host_target = None 145 srctops = [] 146 objroots = [] 147 excludes = [] 148 seen = {} 149 obj_deps = [] 150 src_deps = [] 151 file_deps = [] 152 153 def __init__(self, name, conf={}): 154 """if name is set we will parse it now. 155 conf can have the follwing keys: 156 157 SRCTOPS list of tops of the src tree(s). 158 159 CURDIR the src directory 'bmake' was run from. 160 161 RELDIR the relative path from SRCTOP to CURDIR 162 163 MACHINE the machine we built for. 164 set to 'none' if we are not cross-building. 165 More specifically if machine cannot be deduced from objdirs. 166 167 TARGET_SPEC 168 Sometimes MACHINE isn't enough. 169 170 HOST_TARGET 171 when we build for the pseudo machine 'host' 172 the object tree uses HOST_TARGET rather than MACHINE. 173 174 OBJROOTS a list of the common prefix for all obj dirs it might 175 end in '/' or '-'. 176 177 DPDEPS names an optional file to which per file dependencies 178 will be appended. 179 For example if 'some/path/foo.h' is read from SRCTOP 180 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 181 This can allow 'bmake' to learn all the dirs within 182 the tree that depend on 'foo.h' 183 184 EXCLUDES 185 A list of paths to ignore. 186 ccache(1) can otherwise be trouble. 187 188 debug desired debug level 189 190 debug_out open file to send debug output to (sys.stderr) 191 192 """ 193 194 self.name = name 195 self.debug = getv(conf, 'debug', 0) 196 self.debug_out = getv(conf, 'debug_out', sys.stderr) 197 198 self.machine = getv(conf, 'MACHINE', '') 199 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 200 self.target_spec = getv(conf, 'TARGET_SPEC', '') 201 self.curdir = getv(conf, 'CURDIR') 202 self.reldir = getv(conf, 'RELDIR') 203 self.dpdeps = getv(conf, 'DPDEPS') 204 self.line = 0 205 206 if not self.conf: 207 # some of the steps below we want to do only once 208 self.conf = conf 209 self.host_target = getv(conf, 'HOST_TARGET') 210 for srctop in getv(conf, 'SRCTOPS', []): 211 if srctop[-1] != '/': 212 srctop += '/' 213 if not srctop in self.srctops: 214 self.srctops.append(srctop) 215 _srctop = os.path.realpath(srctop) 216 if _srctop[-1] != '/': 217 _srctop += '/' 218 if not _srctop in self.srctops: 219 self.srctops.append(_srctop) 220 221 trim_list = add_trims(self.machine) 222 if self.machine == 'host': 223 trim_list += add_trims(self.host_target) 224 if self.target_spec: 225 trim_list += add_trims(self.target_spec) 226 227 for objroot in getv(conf, 'OBJROOTS', []): 228 for e in trim_list: 229 if objroot.endswith(e): 230 # this is not what we want - fix it 231 objroot = objroot[0:-len(e)] 232 233 if objroot[-1] != '/': 234 objroot += '/' 235 if not objroot in self.objroots: 236 self.objroots.append(objroot) 237 _objroot = os.path.realpath(objroot) 238 if objroot[-1] == '/': 239 _objroot += '/' 240 if not _objroot in self.objroots: 241 self.objroots.append(_objroot) 242 243 # we want the longest match 244 self.srctops.sort(reverse=True) 245 self.objroots.sort(reverse=True) 246 247 self.excludes = getv(conf, 'EXCLUDES', []) 248 249 if self.debug: 250 print("host_target=", self.host_target, file=self.debug_out) 251 print("srctops=", self.srctops, file=self.debug_out) 252 print("objroots=", self.objroots, file=self.debug_out) 253 print("excludes=", self.excludes, file=self.debug_out) 254 255 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 256 257 if self.dpdeps and not self.reldir: 258 if self.debug: 259 print("need reldir:", end=' ', file=self.debug_out) 260 if self.curdir: 261 srctop = self.find_top(self.curdir, self.srctops) 262 if srctop: 263 self.reldir = self.curdir.replace(srctop,'') 264 if self.debug: 265 print(self.reldir, file=self.debug_out) 266 if not self.reldir: 267 self.dpdeps = None # we cannot do it? 268 269 self.cwd = os.getcwd() # make sure this is initialized 270 self.last_dir = self.cwd 271 272 if name: 273 self.try_parse() 274 275 def reset(self): 276 """reset state if we are being passed meta files from multiple directories.""" 277 self.seen = {} 278 self.obj_deps = [] 279 self.src_deps = [] 280 self.file_deps = [] 281 282 def dirdeps(self, sep='\n'): 283 """return DIRDEPS""" 284 return sep.strip() + sep.join(self.obj_deps) 285 286 def src_dirdeps(self, sep='\n'): 287 """return SRC_DIRDEPS""" 288 return sep.strip() + sep.join(self.src_deps) 289 290 def file_depends(self, out=None): 291 """Append DPDEPS_${file} += ${RELDIR} 292 for each file we saw, to the output file.""" 293 if not self.reldir: 294 return None 295 for f in sort_unique(self.file_deps): 296 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 297 # these entries provide for reverse DIRDEPS lookup 298 for f in self.obj_deps: 299 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 300 301 def seenit(self, dir): 302 """rememer that we have seen dir.""" 303 self.seen[dir] = 1 304 305 def add(self, list, data, clue=''): 306 """add data to list if it isn't already there.""" 307 if data not in list: 308 list.append(data) 309 if self.debug: 310 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 311 312 def find_top(self, path, list): 313 """the logical tree may be split across multiple trees""" 314 for top in list: 315 if path.startswith(top): 316 if self.debug > 2: 317 print("found in", top, file=self.debug_out) 318 return top 319 return None 320 321 def find_obj(self, objroot, dir, path, input): 322 """return path within objroot, taking care of .dirdep files""" 323 ddep = None 324 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 325 if not ddep and os.path.exists(ddepf): 326 ddep = open(ddepf, 'r').readline().strip('# \n') 327 if self.debug > 1: 328 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 329 if ddep.endswith(self.machine): 330 ddep = ddep[0:-(1+len(self.machine))] 331 elif self.target_spec and ddep.endswith(self.target_spec): 332 ddep = ddep[0:-(1+len(self.target_spec))] 333 334 if not ddep: 335 # no .dirdeps, so remember that we've seen the raw input 336 self.seenit(input) 337 self.seenit(dir) 338 if self.machine == 'none': 339 if dir.startswith(objroot): 340 return dir.replace(objroot,'') 341 return None 342 m = self.dirdep_re.match(dir.replace(objroot,'')) 343 if m: 344 ddep = m.group(2) 345 dmachine = m.group(1) 346 if dmachine != self.machine: 347 if not (self.machine == 'host' and 348 dmachine == self.host_target): 349 if self.debug > 2: 350 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 351 ddep += '.' + dmachine 352 353 return ddep 354 355 def try_parse(self, name=None, file=None): 356 """give file and line number causing exception""" 357 try: 358 self.parse(name, file) 359 except: 360 # give a useful clue 361 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 362 raise 363 364 def parse(self, name=None, file=None): 365 """A meta file looks like: 366 367 # Meta data file "path" 368 CMD "command-line" 369 CWD "cwd" 370 TARGET "target" 371 -- command output -- 372 -- filemon acquired metadata -- 373 # buildmon version 3 374 V 3 375 C "pid" "cwd" 376 E "pid" "path" 377 F "pid" "child" 378 R "pid" "path" 379 W "pid" "path" 380 X "pid" "status" 381 D "pid" "path" 382 L "pid" "src" "target" 383 M "pid" "old" "new" 384 S "pid" "path" 385 # Bye bye 386 387 We go to some effort to avoid processing a dependency more than once. 388 Of the above record types only C,E,F,L,R,V and W are of interest. 389 """ 390 391 version = 0 # unknown 392 if name: 393 self.name = name; 394 if file: 395 f = file 396 cwd = self.last_dir = self.cwd 397 else: 398 f = open(self.name, 'r') 399 skip = True 400 pid_cwd = {} 401 pid_last_dir = {} 402 last_pid = 0 403 404 self.line = 0 405 if self.curdir: 406 self.seenit(self.curdir) # we ignore this 407 408 interesting = 'CEFLRV' 409 for line in f: 410 self.line += 1 411 # ignore anything we don't care about 412 if not line[0] in interesting: 413 continue 414 if self.debug > 2: 415 print("input:", line, end=' ', file=self.debug_out) 416 w = line.split() 417 418 if skip: 419 if w[0] == 'V': 420 skip = False 421 version = int(w[1]) 422 """ 423 if version < 4: 424 # we cannot ignore 'W' records 425 # as they may be 'rw' 426 interesting += 'W' 427 """ 428 elif w[0] == 'CWD': 429 self.cwd = cwd = self.last_dir = w[1] 430 self.seenit(cwd) # ignore this 431 if self.debug: 432 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 433 continue 434 435 pid = int(w[1]) 436 if pid != last_pid: 437 if last_pid: 438 pid_last_dir[last_pid] = self.last_dir 439 cwd = getv(pid_cwd, pid, self.cwd) 440 self.last_dir = getv(pid_last_dir, pid, self.cwd) 441 last_pid = pid 442 443 # process operations 444 if w[0] == 'F': 445 npid = int(w[2]) 446 pid_cwd[npid] = cwd 447 pid_last_dir[npid] = cwd 448 last_pid = npid 449 continue 450 elif w[0] == 'C': 451 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 452 if cwd.endswith('/.'): 453 cwd = cwd[0:-2] 454 self.last_dir = pid_last_dir[pid] = cwd 455 pid_cwd[pid] = cwd 456 if self.debug > 1: 457 print("cwd=", cwd, file=self.debug_out) 458 continue 459 460 if w[2] in self.seen: 461 if self.debug > 2: 462 print("seen:", w[2], file=self.debug_out) 463 continue 464 # file operations 465 if w[0] in 'ML': 466 # these are special, tread src as read and 467 # target as write 468 self.parse_path(w[1].strip("'"), cwd, 'R', w) 469 self.parse_path(w[2].strip("'"), cwd, 'W', w) 470 continue 471 elif w[0] in 'ERWS': 472 path = w[2] 473 self.parse_path(path, cwd, w[0], w) 474 475 if not file: 476 f.close() 477 478 def parse_path(self, path, cwd, op=None, w=[]): 479 """look at a path for the op specified""" 480 481 if not op: 482 op = w[0] 483 484 # we are never interested in .dirdep files as dependencies 485 if path.endswith('.dirdep'): 486 return 487 for p in self.excludes: 488 if p and path.startswith(p): 489 if self.debug > 2: 490 print("exclude:", p, path, file=self.debug_out) 491 return 492 # we don't want to resolve the last component if it is 493 # a symlink 494 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 495 if not path: 496 return 497 dir,base = os.path.split(path) 498 if dir in self.seen: 499 if self.debug > 2: 500 print("seen:", dir, file=self.debug_out) 501 return 502 # we can have a path in an objdir which is a link 503 # to the src dir, we may need to add dependencies for each 504 rdir = dir 505 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 506 if rdir == dir or rdir.find('./') > 0: 507 rdir = None 508 # now put path back together 509 path = '/'.join([dir,base]) 510 if self.debug > 1: 511 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 512 if op in 'RWS': 513 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 514 if self.debug > 1: 515 print("skipping:", path, file=self.debug_out) 516 return 517 if os.path.isdir(path): 518 if op in 'RW': 519 self.last_dir = path; 520 if self.debug > 1: 521 print("ldir=", self.last_dir, file=self.debug_out) 522 return 523 524 if op in 'ERW': 525 # finally, we get down to it 526 if dir == self.cwd or dir == self.curdir: 527 return 528 srctop = self.find_top(path, self.srctops) 529 if srctop: 530 if self.dpdeps: 531 self.add(self.file_deps, path.replace(srctop,''), 'file') 532 self.add(self.src_deps, dir.replace(srctop,''), 'src') 533 self.seenit(w[2]) 534 self.seenit(dir) 535 if rdir and not rdir.startswith(srctop): 536 dir = rdir # for below 537 rdir = None 538 else: 539 return 540 541 objroot = None 542 for dir in [dir,rdir]: 543 if not dir: 544 continue 545 objroot = self.find_top(dir, self.objroots) 546 if objroot: 547 break 548 if objroot: 549 ddep = self.find_obj(objroot, dir, path, w[2]) 550 if ddep: 551 self.add(self.obj_deps, ddep, 'obj') 552 if self.dpdeps and objroot.endswith('/stage/'): 553 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 554 self.add(self.file_deps, sp, 'file') 555 else: 556 # don't waste time looking again 557 self.seenit(w[2]) 558 self.seenit(dir) 559 560 561def main(argv, klass=MetaFile, xopts='', xoptf=None): 562 """Simple driver for class MetaFile. 563 564 Usage: 565 script [options] [key=value ...] "meta" ... 566 567 Options and key=value pairs contribute to the 568 dictionary passed to MetaFile. 569 570 -S "SRCTOP" 571 add "SRCTOP" to the "SRCTOPS" list. 572 573 -C "CURDIR" 574 575 -O "OBJROOT" 576 add "OBJROOT" to the "OBJROOTS" list. 577 578 -m "MACHINE" 579 580 -a "MACHINE_ARCH" 581 582 -H "HOST_TARGET" 583 584 -D "DPDEPS" 585 586 -d bumps debug level 587 588 """ 589 import getopt 590 591 # import Psyco if we can 592 # it can speed things up quite a bit 593 have_psyco = 0 594 try: 595 import psyco 596 psyco.full() 597 have_psyco = 1 598 except: 599 pass 600 601 conf = { 602 'SRCTOPS': [], 603 'OBJROOTS': [], 604 'EXCLUDES': [], 605 } 606 607 try: 608 machine = os.environ['MACHINE'] 609 if machine: 610 conf['MACHINE'] = machine 611 machine_arch = os.environ['MACHINE_ARCH'] 612 if machine_arch: 613 conf['MACHINE_ARCH'] = machine_arch 614 srctop = os.environ['SB_SRC'] 615 if srctop: 616 conf['SRCTOPS'].append(srctop) 617 objroot = os.environ['SB_OBJROOT'] 618 if objroot: 619 conf['OBJROOTS'].append(objroot) 620 except: 621 pass 622 623 debug = 0 624 output = True 625 626 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 627 for o, a in opts: 628 if o == '-a': 629 conf['MACHINE_ARCH'] = a 630 elif o == '-d': 631 debug += 1 632 elif o == '-q': 633 output = False 634 elif o == '-H': 635 conf['HOST_TARGET'] = a 636 elif o == '-S': 637 if a not in conf['SRCTOPS']: 638 conf['SRCTOPS'].append(a) 639 elif o == '-C': 640 conf['CURDIR'] = a 641 elif o == '-O': 642 if a not in conf['OBJROOTS']: 643 conf['OBJROOTS'].append(a) 644 elif o == '-R': 645 conf['RELDIR'] = a 646 elif o == '-D': 647 conf['DPDEPS'] = a 648 elif o == '-m': 649 conf['MACHINE'] = a 650 elif o == '-T': 651 conf['TARGET_SPEC'] = a 652 elif o == '-X': 653 if a not in conf['EXCLUDES']: 654 conf['EXCLUDES'].append(a) 655 elif xoptf: 656 xoptf(o, a, conf) 657 658 conf['debug'] = debug 659 660 # get any var=val assignments 661 eaten = [] 662 for a in args: 663 if a.find('=') > 0: 664 k,v = a.split('=') 665 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 666 if k == 'SRCTOP': 667 k = 'SRCTOPS' 668 elif k == 'OBJROOT': 669 k = 'OBJROOTS' 670 if v not in conf[k]: 671 conf[k].append(v) 672 else: 673 conf[k] = v 674 eaten.append(a) 675 continue 676 break 677 678 for a in eaten: 679 args.remove(a) 680 681 debug_out = getv(conf, 'debug_out', sys.stderr) 682 683 if debug: 684 print("config:", file=debug_out) 685 print("psyco=", have_psyco, file=debug_out) 686 for k,v in list(conf.items()): 687 print("%s=%s" % (k,v), file=debug_out) 688 689 m = None 690 for a in args: 691 if a.endswith('.meta'): 692 if not os.path.exists(a): 693 continue 694 m = klass(a, conf) 695 elif a.startswith('@'): 696 # there can actually multiple files per line 697 for line in open(a[1:]): 698 for f in line.strip().split(): 699 if not os.path.exists(f): 700 continue 701 m = klass(f, conf) 702 703 if output and m: 704 print(m.dirdeps()) 705 706 print(m.src_dirdeps('\nsrc:')) 707 708 dpdeps = getv(conf, 'DPDEPS') 709 if dpdeps: 710 m.file_depends(open(dpdeps, 'wb')) 711 712 return m 713 714if __name__ == '__main__': 715 try: 716 main(sys.argv) 717 except: 718 # yes, this goes to stdout 719 print("ERROR: ", sys.exc_info()[1]) 720 raise 721 722