1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $FreeBSD$ 41 $Id: meta2deps.py,v 1.27 2017/05/24 00:04:04 sjg Exp $ 42 43 Copyright (c) 2011-2013, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 return path 85 if path == '.': 86 return cwd 87 if path.startswith('./'): 88 return cwd + path[1:] 89 if last_dir == cwd: 90 last_dir = None 91 for d in [last_dir, cwd]: 92 if not d: 93 continue 94 if path == '..': 95 dw = d.split('/') 96 p = '/'.join(dw[:-1]) 97 if not p: 98 p = '/' 99 return p 100 p = '/'.join([d,path]) 101 if debug > 2: 102 print("looking for:", p, end=' ', file=debug_out) 103 if not os.path.exists(p): 104 if debug > 2: 105 print("nope", file=debug_out) 106 p = None 107 continue 108 if debug > 2: 109 print("found:", p, file=debug_out) 110 return p 111 return None 112 113def cleanpath(path): 114 """cleanup path without using realpath(3)""" 115 if path.startswith('/'): 116 r = '/' 117 else: 118 r = '' 119 p = [] 120 w = path.split('/') 121 for d in w: 122 if not d or d == '.': 123 continue 124 if d == '..': 125 try: 126 p.pop() 127 continue 128 except: 129 break 130 p.append(d) 131 132 return r + '/'.join(p) 133 134def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 135 """ 136 Return an absolute path, resolving via cwd or last_dir if needed. 137 this gets called a lot, so we try to avoid calling realpath. 138 """ 139 rpath = resolve(path, cwd, last_dir, debug, debug_out) 140 if rpath: 141 path = rpath 142 if (path.find('/') < 0 or 143 path.find('./') > 0 or 144 path.endswith('/..')): 145 path = cleanpath(path) 146 return path 147 148def sort_unique(list, cmp=None, key=None, reverse=False): 149 list.sort(cmp, key, reverse) 150 nl = [] 151 le = None 152 for e in list: 153 if e == le: 154 continue 155 le = e 156 nl.append(e) 157 return nl 158 159def add_trims(x): 160 return ['/' + x + '/', 161 '/' + x, 162 x + '/', 163 x] 164 165class MetaFile: 166 """class to parse meta files generated by bmake.""" 167 168 conf = None 169 dirdep_re = None 170 host_target = None 171 srctops = [] 172 objroots = [] 173 excludes = [] 174 seen = {} 175 obj_deps = [] 176 src_deps = [] 177 file_deps = [] 178 179 def __init__(self, name, conf={}): 180 """if name is set we will parse it now. 181 conf can have the follwing keys: 182 183 SRCTOPS list of tops of the src tree(s). 184 185 CURDIR the src directory 'bmake' was run from. 186 187 RELDIR the relative path from SRCTOP to CURDIR 188 189 MACHINE the machine we built for. 190 set to 'none' if we are not cross-building. 191 More specifically if machine cannot be deduced from objdirs. 192 193 TARGET_SPEC 194 Sometimes MACHINE isn't enough. 195 196 HOST_TARGET 197 when we build for the pseudo machine 'host' 198 the object tree uses HOST_TARGET rather than MACHINE. 199 200 OBJROOTS a list of the common prefix for all obj dirs it might 201 end in '/' or '-'. 202 203 DPDEPS names an optional file to which per file dependencies 204 will be appended. 205 For example if 'some/path/foo.h' is read from SRCTOP 206 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 207 This can allow 'bmake' to learn all the dirs within 208 the tree that depend on 'foo.h' 209 210 EXCLUDES 211 A list of paths to ignore. 212 ccache(1) can otherwise be trouble. 213 214 debug desired debug level 215 216 debug_out open file to send debug output to (sys.stderr) 217 218 """ 219 220 self.name = name 221 self.debug = getv(conf, 'debug', 0) 222 self.debug_out = getv(conf, 'debug_out', sys.stderr) 223 224 self.machine = getv(conf, 'MACHINE', '') 225 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 226 self.target_spec = getv(conf, 'TARGET_SPEC', '') 227 self.curdir = getv(conf, 'CURDIR') 228 self.reldir = getv(conf, 'RELDIR') 229 self.dpdeps = getv(conf, 'DPDEPS') 230 self.line = 0 231 232 if not self.conf: 233 # some of the steps below we want to do only once 234 self.conf = conf 235 self.host_target = getv(conf, 'HOST_TARGET') 236 for srctop in getv(conf, 'SRCTOPS', []): 237 if srctop[-1] != '/': 238 srctop += '/' 239 if not srctop in self.srctops: 240 self.srctops.append(srctop) 241 _srctop = os.path.realpath(srctop) 242 if _srctop[-1] != '/': 243 _srctop += '/' 244 if not _srctop in self.srctops: 245 self.srctops.append(_srctop) 246 247 trim_list = add_trims(self.machine) 248 if self.machine == 'host': 249 trim_list += add_trims(self.host_target) 250 if self.target_spec: 251 trim_list += add_trims(self.target_spec) 252 253 for objroot in getv(conf, 'OBJROOTS', []): 254 for e in trim_list: 255 if objroot.endswith(e): 256 # this is not what we want - fix it 257 objroot = objroot[0:-len(e)] 258 259 if objroot[-1] != '/': 260 objroot += '/' 261 if not objroot in self.objroots: 262 self.objroots.append(objroot) 263 _objroot = os.path.realpath(objroot) 264 if objroot[-1] == '/': 265 _objroot += '/' 266 if not _objroot in self.objroots: 267 self.objroots.append(_objroot) 268 269 # we want the longest match 270 self.srctops.sort(reverse=True) 271 self.objroots.sort(reverse=True) 272 273 self.excludes = getv(conf, 'EXCLUDES', []) 274 275 if self.debug: 276 print("host_target=", self.host_target, file=self.debug_out) 277 print("srctops=", self.srctops, file=self.debug_out) 278 print("objroots=", self.objroots, file=self.debug_out) 279 print("excludes=", self.excludes, file=self.debug_out) 280 281 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 282 283 if self.dpdeps and not self.reldir: 284 if self.debug: 285 print("need reldir:", end=' ', file=self.debug_out) 286 if self.curdir: 287 srctop = self.find_top(self.curdir, self.srctops) 288 if srctop: 289 self.reldir = self.curdir.replace(srctop,'') 290 if self.debug: 291 print(self.reldir, file=self.debug_out) 292 if not self.reldir: 293 self.dpdeps = None # we cannot do it? 294 295 self.cwd = os.getcwd() # make sure this is initialized 296 self.last_dir = self.cwd 297 298 if name: 299 self.try_parse() 300 301 def reset(self): 302 """reset state if we are being passed meta files from multiple directories.""" 303 self.seen = {} 304 self.obj_deps = [] 305 self.src_deps = [] 306 self.file_deps = [] 307 308 def dirdeps(self, sep='\n'): 309 """return DIRDEPS""" 310 return sep.strip() + sep.join(self.obj_deps) 311 312 def src_dirdeps(self, sep='\n'): 313 """return SRC_DIRDEPS""" 314 return sep.strip() + sep.join(self.src_deps) 315 316 def file_depends(self, out=None): 317 """Append DPDEPS_${file} += ${RELDIR} 318 for each file we saw, to the output file.""" 319 if not self.reldir: 320 return None 321 for f in sort_unique(self.file_deps): 322 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 323 # these entries provide for reverse DIRDEPS lookup 324 for f in self.obj_deps: 325 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out) 326 327 def seenit(self, dir): 328 """rememer that we have seen dir.""" 329 self.seen[dir] = 1 330 331 def add(self, list, data, clue=''): 332 """add data to list if it isn't already there.""" 333 if data not in list: 334 list.append(data) 335 if self.debug: 336 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 337 338 def find_top(self, path, list): 339 """the logical tree may be split across multiple trees""" 340 for top in list: 341 if path.startswith(top): 342 if self.debug > 2: 343 print("found in", top, file=self.debug_out) 344 return top 345 return None 346 347 def find_obj(self, objroot, dir, path, input): 348 """return path within objroot, taking care of .dirdep files""" 349 ddep = None 350 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 351 if not ddep and os.path.exists(ddepf): 352 ddep = open(ddepf, 'r').readline().strip('# \n') 353 if self.debug > 1: 354 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 355 if ddep.endswith(self.machine): 356 ddep = ddep[0:-(1+len(self.machine))] 357 elif self.target_spec and ddep.endswith(self.target_spec): 358 ddep = ddep[0:-(1+len(self.target_spec))] 359 360 if not ddep: 361 # no .dirdeps, so remember that we've seen the raw input 362 self.seenit(input) 363 self.seenit(dir) 364 if self.machine == 'none': 365 if dir.startswith(objroot): 366 return dir.replace(objroot,'') 367 return None 368 m = self.dirdep_re.match(dir.replace(objroot,'')) 369 if m: 370 ddep = m.group(2) 371 dmachine = m.group(1) 372 if dmachine != self.machine: 373 if not (self.machine == 'host' and 374 dmachine == self.host_target): 375 if self.debug > 2: 376 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 377 ddep += '.' + dmachine 378 379 return ddep 380 381 def try_parse(self, name=None, file=None): 382 """give file and line number causing exception""" 383 try: 384 self.parse(name, file) 385 except: 386 # give a useful clue 387 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 388 raise 389 390 def parse(self, name=None, file=None): 391 """A meta file looks like: 392 393 # Meta data file "path" 394 CMD "command-line" 395 CWD "cwd" 396 TARGET "target" 397 -- command output -- 398 -- filemon acquired metadata -- 399 # buildmon version 3 400 V 3 401 C "pid" "cwd" 402 E "pid" "path" 403 F "pid" "child" 404 R "pid" "path" 405 W "pid" "path" 406 X "pid" "status" 407 D "pid" "path" 408 L "pid" "src" "target" 409 M "pid" "old" "new" 410 S "pid" "path" 411 # Bye bye 412 413 We go to some effort to avoid processing a dependency more than once. 414 Of the above record types only C,E,F,L,R,V and W are of interest. 415 """ 416 417 version = 0 # unknown 418 if name: 419 self.name = name; 420 if file: 421 f = file 422 cwd = self.last_dir = self.cwd 423 else: 424 f = open(self.name, 'r') 425 skip = True 426 pid_cwd = {} 427 pid_last_dir = {} 428 last_pid = 0 429 430 self.line = 0 431 if self.curdir: 432 self.seenit(self.curdir) # we ignore this 433 434 interesting = 'CEFLRV' 435 for line in f: 436 self.line += 1 437 # ignore anything we don't care about 438 if not line[0] in interesting: 439 continue 440 if self.debug > 2: 441 print("input:", line, end=' ', file=self.debug_out) 442 w = line.split() 443 444 if skip: 445 if w[0] == 'V': 446 skip = False 447 version = int(w[1]) 448 """ 449 if version < 4: 450 # we cannot ignore 'W' records 451 # as they may be 'rw' 452 interesting += 'W' 453 """ 454 elif w[0] == 'CWD': 455 self.cwd = cwd = self.last_dir = w[1] 456 self.seenit(cwd) # ignore this 457 if self.debug: 458 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 459 continue 460 461 pid = int(w[1]) 462 if pid != last_pid: 463 if last_pid: 464 pid_last_dir[last_pid] = self.last_dir 465 cwd = getv(pid_cwd, pid, self.cwd) 466 self.last_dir = getv(pid_last_dir, pid, self.cwd) 467 last_pid = pid 468 469 # process operations 470 if w[0] == 'F': 471 npid = int(w[2]) 472 pid_cwd[npid] = cwd 473 pid_last_dir[npid] = cwd 474 last_pid = npid 475 continue 476 elif w[0] == 'C': 477 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 478 if cwd.endswith('/.'): 479 cwd = cwd[0:-2] 480 self.last_dir = pid_last_dir[pid] = cwd 481 pid_cwd[pid] = cwd 482 if self.debug > 1: 483 print("cwd=", cwd, file=self.debug_out) 484 continue 485 486 if w[2] in self.seen: 487 if self.debug > 2: 488 print("seen:", w[2], file=self.debug_out) 489 continue 490 # file operations 491 if w[0] in 'ML': 492 # these are special, tread src as read and 493 # target as write 494 self.parse_path(w[1].strip("'"), cwd, 'R', w) 495 self.parse_path(w[2].strip("'"), cwd, 'W', w) 496 continue 497 elif w[0] in 'ERWS': 498 path = w[2] 499 self.parse_path(path, cwd, w[0], w) 500 501 if not file: 502 f.close() 503 504 def is_src(self, base, dir, rdir): 505 """is base in srctop""" 506 for dir in [dir,rdir]: 507 if not dir: 508 continue 509 path = '/'.join([dir,base]) 510 srctop = self.find_top(path, self.srctops) 511 if srctop: 512 if self.dpdeps: 513 self.add(self.file_deps, path.replace(srctop,''), 'file') 514 self.add(self.src_deps, dir.replace(srctop,''), 'src') 515 self.seenit(dir) 516 return True 517 return False 518 519 def parse_path(self, path, cwd, op=None, w=[]): 520 """look at a path for the op specified""" 521 522 if not op: 523 op = w[0] 524 525 # we are never interested in .dirdep files as dependencies 526 if path.endswith('.dirdep'): 527 return 528 for p in self.excludes: 529 if p and path.startswith(p): 530 if self.debug > 2: 531 print("exclude:", p, path, file=self.debug_out) 532 return 533 # we don't want to resolve the last component if it is 534 # a symlink 535 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 536 if not path: 537 return 538 dir,base = os.path.split(path) 539 if dir in self.seen: 540 if self.debug > 2: 541 print("seen:", dir, file=self.debug_out) 542 return 543 # we can have a path in an objdir which is a link 544 # to the src dir, we may need to add dependencies for each 545 rdir = dir 546 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 547 rdir = os.path.realpath(dir) 548 if rdir == dir: 549 rdir = None 550 # now put path back together 551 path = '/'.join([dir,base]) 552 if self.debug > 1: 553 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 554 if op in 'RWS': 555 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 556 if self.debug > 1: 557 print("skipping:", path, file=self.debug_out) 558 return 559 if os.path.isdir(path): 560 if op in 'RW': 561 self.last_dir = path; 562 if self.debug > 1: 563 print("ldir=", self.last_dir, file=self.debug_out) 564 return 565 566 if op in 'ERW': 567 # finally, we get down to it 568 if dir == self.cwd or dir == self.curdir: 569 return 570 if self.is_src(base, dir, rdir): 571 self.seenit(w[2]) 572 if not rdir: 573 return 574 575 objroot = None 576 for dir in [dir,rdir]: 577 if not dir: 578 continue 579 objroot = self.find_top(dir, self.objroots) 580 if objroot: 581 break 582 if objroot: 583 ddep = self.find_obj(objroot, dir, path, w[2]) 584 if ddep: 585 self.add(self.obj_deps, ddep, 'obj') 586 if self.dpdeps and objroot.endswith('/stage/'): 587 sp = '/'.join(path.replace(objroot,'').split('/')[1:]) 588 self.add(self.file_deps, sp, 'file') 589 else: 590 # don't waste time looking again 591 self.seenit(w[2]) 592 self.seenit(dir) 593 594 595def main(argv, klass=MetaFile, xopts='', xoptf=None): 596 """Simple driver for class MetaFile. 597 598 Usage: 599 script [options] [key=value ...] "meta" ... 600 601 Options and key=value pairs contribute to the 602 dictionary passed to MetaFile. 603 604 -S "SRCTOP" 605 add "SRCTOP" to the "SRCTOPS" list. 606 607 -C "CURDIR" 608 609 -O "OBJROOT" 610 add "OBJROOT" to the "OBJROOTS" list. 611 612 -m "MACHINE" 613 614 -a "MACHINE_ARCH" 615 616 -H "HOST_TARGET" 617 618 -D "DPDEPS" 619 620 -d bumps debug level 621 622 """ 623 import getopt 624 625 # import Psyco if we can 626 # it can speed things up quite a bit 627 have_psyco = 0 628 try: 629 import psyco 630 psyco.full() 631 have_psyco = 1 632 except: 633 pass 634 635 conf = { 636 'SRCTOPS': [], 637 'OBJROOTS': [], 638 'EXCLUDES': [], 639 } 640 641 try: 642 machine = os.environ['MACHINE'] 643 if machine: 644 conf['MACHINE'] = machine 645 machine_arch = os.environ['MACHINE_ARCH'] 646 if machine_arch: 647 conf['MACHINE_ARCH'] = machine_arch 648 srctop = os.environ['SB_SRC'] 649 if srctop: 650 conf['SRCTOPS'].append(srctop) 651 objroot = os.environ['SB_OBJROOT'] 652 if objroot: 653 conf['OBJROOTS'].append(objroot) 654 except: 655 pass 656 657 debug = 0 658 output = True 659 660 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 661 for o, a in opts: 662 if o == '-a': 663 conf['MACHINE_ARCH'] = a 664 elif o == '-d': 665 debug += 1 666 elif o == '-q': 667 output = False 668 elif o == '-H': 669 conf['HOST_TARGET'] = a 670 elif o == '-S': 671 if a not in conf['SRCTOPS']: 672 conf['SRCTOPS'].append(a) 673 elif o == '-C': 674 conf['CURDIR'] = a 675 elif o == '-O': 676 if a not in conf['OBJROOTS']: 677 conf['OBJROOTS'].append(a) 678 elif o == '-R': 679 conf['RELDIR'] = a 680 elif o == '-D': 681 conf['DPDEPS'] = a 682 elif o == '-m': 683 conf['MACHINE'] = a 684 elif o == '-T': 685 conf['TARGET_SPEC'] = a 686 elif o == '-X': 687 if a not in conf['EXCLUDES']: 688 conf['EXCLUDES'].append(a) 689 elif xoptf: 690 xoptf(o, a, conf) 691 692 conf['debug'] = debug 693 694 # get any var=val assignments 695 eaten = [] 696 for a in args: 697 if a.find('=') > 0: 698 k,v = a.split('=') 699 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 700 if k == 'SRCTOP': 701 k = 'SRCTOPS' 702 elif k == 'OBJROOT': 703 k = 'OBJROOTS' 704 if v not in conf[k]: 705 conf[k].append(v) 706 else: 707 conf[k] = v 708 eaten.append(a) 709 continue 710 break 711 712 for a in eaten: 713 args.remove(a) 714 715 debug_out = getv(conf, 'debug_out', sys.stderr) 716 717 if debug: 718 print("config:", file=debug_out) 719 print("psyco=", have_psyco, file=debug_out) 720 for k,v in list(conf.items()): 721 print("%s=%s" % (k,v), file=debug_out) 722 723 m = None 724 for a in args: 725 if a.endswith('.meta'): 726 if not os.path.exists(a): 727 continue 728 m = klass(a, conf) 729 elif a.startswith('@'): 730 # there can actually multiple files per line 731 for line in open(a[1:]): 732 for f in line.strip().split(): 733 if not os.path.exists(f): 734 continue 735 m = klass(f, conf) 736 737 if output and m: 738 print(m.dirdeps()) 739 740 print(m.src_dirdeps('\nsrc:')) 741 742 dpdeps = getv(conf, 'DPDEPS') 743 if dpdeps: 744 m.file_depends(open(dpdeps, 'wb')) 745 746 return m 747 748if __name__ == '__main__': 749 try: 750 main(sys.argv) 751 except: 752 # yes, this goes to stdout 753 print("ERROR: ", sys.exc_info()[1]) 754 raise 755 756