1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $FreeBSD$ 41 $Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $ 42 43 Copyright (c) 2011-2013, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 return path 85 if path == '.': 86 return cwd 87 if path.startswith('./'): 88 return cwd + path[1:] 89 if last_dir == cwd: 90 last_dir = None 91 for d in [last_dir, cwd]: 92 if not d: 93 continue 94 p = '/'.join([d,path]) 95 if debug > 2: 96 print("looking for:", p, end=' ', file=debug_out) 97 if not os.path.exists(p): 98 if debug > 2: 99 print("nope", file=debug_out) 100 p = None 101 continue 102 if debug > 2: 103 print("found:", p, file=debug_out) 104 return p 105 return None 106 107def cleanpath(path): 108 """cleanup path without using realpath(3)""" 109 if path.startswith('/'): 110 r = '/' 111 else: 112 r = '' 113 p = [] 114 w = path.split('/') 115 for d in w: 116 if not d or d == '.': 117 continue 118 if d == '..': 119 p.pop() 120 continue 121 p.append(d) 122 123 return r + '/'.join(p) 124 125def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 126 """ 127 Return an absolute path, resolving via cwd or last_dir if needed. 128 this gets called a lot, so we try to avoid calling realpath. 129 """ 130 rpath = resolve(path, cwd, last_dir, debug, debug_out) 131 if rpath: 132 path = rpath 133 if (path.find('/') < 0 or 134 path.find('./') > 0 or 135 path.endswith('/..')): 136 path = cleanpath(path) 137 return path 138 139def sort_unique(list, cmp=None, key=None, reverse=False): 140 list.sort(cmp, key, reverse) 141 nl = [] 142 le = None 143 for e in list: 144 if e == le: 145 continue 146 le = e 147 nl.append(e) 148 return nl 149 150def add_trims(x): 151 return ['/' + x + '/', 152 '/' + x, 153 x + '/', 154 x] 155 156class MetaFile: 157 """class to parse meta files generated by bmake.""" 158 159 conf = None 160 dirdep_re = None 161 host_target = None 162 srctops = [] 163 objroots = [] 164 excludes = [] 165 seen = {} 166 obj_deps = [] 167 src_deps = [] 168 file_deps = [] 169 170 def __init__(self, name, conf={}): 171 """if name is set we will parse it now. 172 conf can have the follwing keys: 173 174 SRCTOPS list of tops of the src tree(s). 175 176 CURDIR the src directory 'bmake' was run from. 177 178 RELDIR the relative path from SRCTOP to CURDIR 179 180 MACHINE the machine we built for. 181 set to 'none' if we are not cross-building. 182 More specifically if machine cannot be deduced from objdirs. 183 184 TARGET_SPEC 185 Sometimes MACHINE isn't enough. 186 187 HOST_TARGET 188 when we build for the pseudo machine 'host' 189 the object tree uses HOST_TARGET rather than MACHINE. 190 191 OBJROOTS a list of the common prefix for all obj dirs it might 192 end in '/' or '-'. 193 194 DPDEPS names an optional file to which per file dependencies 195 will be appended. 196 For example if 'some/path/foo.h' is read from SRCTOP 197 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 198 This can allow 'bmake' to learn all the dirs within 199 the tree that depend on 'foo.h' 200 201 EXCLUDES 202 A list of paths to ignore. 203 ccache(1) can otherwise be trouble. 204 205 debug desired debug level 206 207 debug_out open file to send debug output to (sys.stderr) 208 209 """ 210 211 self.name = name 212 self.debug = getv(conf, 'debug', 0) 213 self.debug_out = getv(conf, 'debug_out', sys.stderr) 214 215 self.machine = getv(conf, 'MACHINE', '') 216 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 217 self.target_spec = getv(conf, 'TARGET_SPEC', '') 218 self.curdir = getv(conf, 'CURDIR') 219 self.reldir = getv(conf, 'RELDIR') 220 self.dpdeps = getv(conf, 'DPDEPS') 221 self.line = 0 222 223 if not self.conf: 224 # some of the steps below we want to do only once 225 self.conf = conf 226 self.host_target = getv(conf, 'HOST_TARGET') 227 for srctop in getv(conf, 'SRCTOPS', []): 228 if srctop[-1] != '/': 229 srctop += '/' 230 if not srctop in self.srctops: 231 self.srctops.append(srctop) 232 _srctop = os.path.realpath(srctop) 233 if _srctop[-1] != '/': 234 _srctop += '/' 235 if not _srctop in self.srctops: 236 self.srctops.append(_srctop) 237 238 trim_list = add_trims(self.machine) 239 if self.machine == 'host': 240 trim_list += add_trims(self.host_target) 241 if self.target_spec: 242 trim_list += add_trims(self.target_spec) 243 244 for objroot in getv(conf, 'OBJROOTS', []): 245 for e in trim_list: 246 if objroot.endswith(e): 247 # this is not what we want - fix it 248 objroot = objroot[0:-len(e)] 249 250 if objroot[-1] != '/': 251 objroot += '/' 252 if not objroot in self.objroots: 253 self.objroots.append(objroot) 254 _objroot = os.path.realpath(objroot) 255 if objroot[-1] == '/': 256 _objroot += '/' 257 if not _objroot in self.objroots: 258 self.objroots.append(_objroot) 259 260 # we want the longest match 261 self.srctops.sort(reverse=True) 262 self.objroots.sort(reverse=True) 263 264 self.excludes = getv(conf, 'EXCLUDES', []) 265 266 if self.debug: 267 print("host_target=", self.host_target, file=self.debug_out) 268 print("srctops=", self.srctops, file=self.debug_out) 269 print("objroots=", self.objroots, file=self.debug_out) 270 print("excludes=", self.excludes, file=self.debug_out) 271 272 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 273 274 if self.dpdeps and not self.reldir: 275 if self.debug: 276 print("need reldir:", end=' ', file=self.debug_out) 277 if self.curdir: 278 srctop = self.find_top(self.curdir, self.srctops) 279 if srctop: 280 self.reldir = self.curdir.replace(srctop,'') 281 if self.debug: 282 print(self.reldir, file=self.debug_out) 283 if not self.reldir: 284 self.dpdeps = None # we cannot do it? 285 286 self.cwd = os.getcwd() # make sure this is initialized 287 self.last_dir = self.cwd 288 289 if name: 290 self.try_parse() 291 292 def reset(self): 293 """reset state if we are being passed meta files from multiple directories.""" 294 self.seen = {} 295 self.obj_deps = [] 296 self.src_deps = [] 297 self.file_deps = [] 298 299 def dirdeps(self, sep='\n'): 300 """return DIRDEPS""" 301 return sep.strip() + sep.join(self.obj_deps) 302 303 def src_dirdeps(self, sep='\n'): 304 """return SRC_DIRDEPS""" 305 return sep.strip() + sep.join(self.src_deps) 306 307 def file_depends(self, out=None): 308 """Append DPDEPS_${file} += ${RELDIR} 309 for each file we saw, to the output file.""" 310 if not self.reldir: 311 return None 312 for f in sort_unique(self.file_deps): 313 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 314 # these entries provide for reverse DIRDEPS lookup 315 for f in self.obj_deps: 316 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 317 318 def seenit(self, dir): 319 """rememer that we have seen dir.""" 320 self.seen[dir] = 1 321 322 def add(self, list, data, clue=''): 323 """add data to list if it isn't already there.""" 324 if data not in list: 325 list.append(data) 326 if self.debug: 327 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 328 329 def find_top(self, path, list): 330 """the logical tree may be split across multiple trees""" 331 for top in list: 332 if path.startswith(top): 333 if self.debug > 2: 334 print("found in", top, file=self.debug_out) 335 return top 336 return None 337 338 def find_obj(self, objroot, dir, path, input): 339 """return path within objroot, taking care of .dirdep files""" 340 ddep = None 341 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 342 if not ddep and os.path.exists(ddepf): 343 ddep = open(ddepf, 'r').readline().strip('# \n') 344 if self.debug > 1: 345 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 346 if ddep.endswith(self.machine): 347 ddep = ddep[0:-(1+len(self.machine))] 348 elif self.target_spec and ddep.endswith(self.target_spec): 349 ddep = ddep[0:-(1+len(self.target_spec))] 350 351 if not ddep: 352 # no .dirdeps, so remember that we've seen the raw input 353 self.seenit(input) 354 self.seenit(dir) 355 if self.machine == 'none': 356 if dir.startswith(objroot): 357 return dir.replace(objroot,'') 358 return None 359 m = self.dirdep_re.match(dir.replace(objroot,'')) 360 if m: 361 ddep = m.group(2) 362 dmachine = m.group(1) 363 if dmachine != self.machine: 364 if not (self.machine == 'host' and 365 dmachine == self.host_target): 366 if self.debug > 2: 367 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 368 ddep += '.' + dmachine 369 370 return ddep 371 372 def try_parse(self, name=None, file=None): 373 """give file and line number causing exception""" 374 try: 375 self.parse(name, file) 376 except: 377 # give a useful clue 378 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 379 raise 380 381 def parse(self, name=None, file=None): 382 """A meta file looks like: 383 384 # Meta data file "path" 385 CMD "command-line" 386 CWD "cwd" 387 TARGET "target" 388 -- command output -- 389 -- filemon acquired metadata -- 390 # buildmon version 3 391 V 3 392 C "pid" "cwd" 393 E "pid" "path" 394 F "pid" "child" 395 R "pid" "path" 396 W "pid" "path" 397 X "pid" "status" 398 D "pid" "path" 399 L "pid" "src" "target" 400 M "pid" "old" "new" 401 S "pid" "path" 402 # Bye bye 403 404 We go to some effort to avoid processing a dependency more than once. 405 Of the above record types only C,E,F,L,R,V and W are of interest. 406 """ 407 408 version = 0 # unknown 409 if name: 410 self.name = name; 411 if file: 412 f = file 413 cwd = self.last_dir = self.cwd 414 else: 415 f = open(self.name, 'r') 416 skip = True 417 pid_cwd = {} 418 pid_last_dir = {} 419 last_pid = 0 420 421 self.line = 0 422 if self.curdir: 423 self.seenit(self.curdir) # we ignore this 424 425 interesting = 'CEFLRV' 426 for line in f: 427 self.line += 1 428 # ignore anything we don't care about 429 if not line[0] in interesting: 430 continue 431 if self.debug > 2: 432 print("input:", line, end=' ', file=self.debug_out) 433 w = line.split() 434 435 if skip: 436 if w[0] == 'V': 437 skip = False 438 version = int(w[1]) 439 """ 440 if version < 4: 441 # we cannot ignore 'W' records 442 # as they may be 'rw' 443 interesting += 'W' 444 """ 445 elif w[0] == 'CWD': 446 self.cwd = cwd = self.last_dir = w[1] 447 self.seenit(cwd) # ignore this 448 if self.debug: 449 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 450 continue 451 452 pid = int(w[1]) 453 if pid != last_pid: 454 if last_pid: 455 pid_last_dir[last_pid] = self.last_dir 456 cwd = getv(pid_cwd, pid, self.cwd) 457 self.last_dir = getv(pid_last_dir, pid, self.cwd) 458 last_pid = pid 459 460 # process operations 461 if w[0] == 'F': 462 npid = int(w[2]) 463 pid_cwd[npid] = cwd 464 pid_last_dir[npid] = cwd 465 last_pid = npid 466 continue 467 elif w[0] == 'C': 468 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 469 if cwd.endswith('/.'): 470 cwd = cwd[0:-2] 471 self.last_dir = pid_last_dir[pid] = cwd 472 pid_cwd[pid] = cwd 473 if self.debug > 1: 474 print("cwd=", cwd, file=self.debug_out) 475 continue 476 477 if w[2] in self.seen: 478 if self.debug > 2: 479 print("seen:", w[2], file=self.debug_out) 480 continue 481 # file operations 482 if w[0] in 'ML': 483 # these are special, tread src as read and 484 # target as write 485 self.parse_path(w[1].strip("'"), cwd, 'R', w) 486 self.parse_path(w[2].strip("'"), cwd, 'W', w) 487 continue 488 elif w[0] in 'ERWS': 489 path = w[2] 490 self.parse_path(path, cwd, w[0], w) 491 492 if not file: 493 f.close() 494 495 def parse_path(self, path, cwd, op=None, w=[]): 496 """look at a path for the op specified""" 497 498 if not op: 499 op = w[0] 500 501 # we are never interested in .dirdep files as dependencies 502 if path.endswith('.dirdep'): 503 return 504 for p in self.excludes: 505 if p and path.startswith(p): 506 if self.debug > 2: 507 print("exclude:", p, path, file=self.debug_out) 508 return 509 # we don't want to resolve the last component if it is 510 # a symlink 511 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 512 if not path: 513 return 514 dir,base = os.path.split(path) 515 if dir in self.seen: 516 if self.debug > 2: 517 print("seen:", dir, file=self.debug_out) 518 return 519 # we can have a path in an objdir which is a link 520 # to the src dir, we may need to add dependencies for each 521 rdir = dir 522 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 523 if rdir == dir or rdir.find('./') > 0: 524 rdir = None 525 if os.path.islink(dir): 526 rdir = os.path.realpath(dir) 527 # now put path back together 528 path = '/'.join([dir,base]) 529 if self.debug > 1: 530 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 531 if op in 'RWS': 532 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 533 if self.debug > 1: 534 print("skipping:", path, file=self.debug_out) 535 return 536 if os.path.isdir(path): 537 if op in 'RW': 538 self.last_dir = path; 539 if self.debug > 1: 540 print("ldir=", self.last_dir, file=self.debug_out) 541 return 542 543 if op in 'ERW': 544 # finally, we get down to it 545 if dir == self.cwd or dir == self.curdir: 546 return 547 srctop = self.find_top(path, self.srctops) 548 if srctop: 549 if self.dpdeps: 550 self.add(self.file_deps, path.replace(srctop,''), 'file') 551 self.add(self.src_deps, dir.replace(srctop,''), 'src') 552 self.seenit(w[2]) 553 self.seenit(dir) 554 if rdir and not rdir.startswith(srctop): 555 dir = rdir # for below 556 rdir = None 557 else: 558 return 559 560 objroot = None 561 for dir in [dir,rdir]: 562 if not dir: 563 continue 564 objroot = self.find_top(dir, self.objroots) 565 if objroot: 566 break 567 if objroot: 568 ddep = self.find_obj(objroot, dir, path, w[2]) 569 if ddep: 570 self.add(self.obj_deps, ddep, 'obj') 571 if self.dpdeps and objroot.endswith('/stage/'): 572 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 573 self.add(self.file_deps, sp, 'file') 574 else: 575 # don't waste time looking again 576 self.seenit(w[2]) 577 self.seenit(dir) 578 579 580def main(argv, klass=MetaFile, xopts='', xoptf=None): 581 """Simple driver for class MetaFile. 582 583 Usage: 584 script [options] [key=value ...] "meta" ... 585 586 Options and key=value pairs contribute to the 587 dictionary passed to MetaFile. 588 589 -S "SRCTOP" 590 add "SRCTOP" to the "SRCTOPS" list. 591 592 -C "CURDIR" 593 594 -O "OBJROOT" 595 add "OBJROOT" to the "OBJROOTS" list. 596 597 -m "MACHINE" 598 599 -a "MACHINE_ARCH" 600 601 -H "HOST_TARGET" 602 603 -D "DPDEPS" 604 605 -d bumps debug level 606 607 """ 608 import getopt 609 610 # import Psyco if we can 611 # it can speed things up quite a bit 612 have_psyco = 0 613 try: 614 import psyco 615 psyco.full() 616 have_psyco = 1 617 except: 618 pass 619 620 conf = { 621 'SRCTOPS': [], 622 'OBJROOTS': [], 623 'EXCLUDES': [], 624 } 625 626 try: 627 machine = os.environ['MACHINE'] 628 if machine: 629 conf['MACHINE'] = machine 630 machine_arch = os.environ['MACHINE_ARCH'] 631 if machine_arch: 632 conf['MACHINE_ARCH'] = machine_arch 633 srctop = os.environ['SB_SRC'] 634 if srctop: 635 conf['SRCTOPS'].append(srctop) 636 objroot = os.environ['SB_OBJROOT'] 637 if objroot: 638 conf['OBJROOTS'].append(objroot) 639 except: 640 pass 641 642 debug = 0 643 output = True 644 645 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 646 for o, a in opts: 647 if o == '-a': 648 conf['MACHINE_ARCH'] = a 649 elif o == '-d': 650 debug += 1 651 elif o == '-q': 652 output = False 653 elif o == '-H': 654 conf['HOST_TARGET'] = a 655 elif o == '-S': 656 if a not in conf['SRCTOPS']: 657 conf['SRCTOPS'].append(a) 658 elif o == '-C': 659 conf['CURDIR'] = a 660 elif o == '-O': 661 if a not in conf['OBJROOTS']: 662 conf['OBJROOTS'].append(a) 663 elif o == '-R': 664 conf['RELDIR'] = a 665 elif o == '-D': 666 conf['DPDEPS'] = a 667 elif o == '-m': 668 conf['MACHINE'] = a 669 elif o == '-T': 670 conf['TARGET_SPEC'] = a 671 elif o == '-X': 672 if a not in conf['EXCLUDES']: 673 conf['EXCLUDES'].append(a) 674 elif xoptf: 675 xoptf(o, a, conf) 676 677 conf['debug'] = debug 678 679 # get any var=val assignments 680 eaten = [] 681 for a in args: 682 if a.find('=') > 0: 683 k,v = a.split('=') 684 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 685 if k == 'SRCTOP': 686 k = 'SRCTOPS' 687 elif k == 'OBJROOT': 688 k = 'OBJROOTS' 689 if v not in conf[k]: 690 conf[k].append(v) 691 else: 692 conf[k] = v 693 eaten.append(a) 694 continue 695 break 696 697 for a in eaten: 698 args.remove(a) 699 700 debug_out = getv(conf, 'debug_out', sys.stderr) 701 702 if debug: 703 print("config:", file=debug_out) 704 print("psyco=", have_psyco, file=debug_out) 705 for k,v in list(conf.items()): 706 print("%s=%s" % (k,v), file=debug_out) 707 708 m = None 709 for a in args: 710 if a.endswith('.meta'): 711 if not os.path.exists(a): 712 continue 713 m = klass(a, conf) 714 elif a.startswith('@'): 715 # there can actually multiple files per line 716 for line in open(a[1:]): 717 for f in line.strip().split(): 718 if not os.path.exists(f): 719 continue 720 m = klass(f, conf) 721 722 if output and m: 723 print(m.dirdeps()) 724 725 print(m.src_dirdeps('\nsrc:')) 726 727 dpdeps = getv(conf, 'DPDEPS') 728 if dpdeps: 729 m.file_depends(open(dpdeps, 'wb')) 730 731 return m 732 733if __name__ == '__main__': 734 try: 735 main(sys.argv) 736 except: 737 # yes, this goes to stdout 738 print("ERROR: ", sys.exc_info()[1]) 739 raise 740 741