1# $FreeBSD$ 2#!/usr/bin/env python 3 4from __future__ import print_function 5 6""" 7This script parses each "meta" file and extracts the 8information needed to deduce build and src dependencies. 9 10It works much the same as the original shell script, but is 11*much* more efficient. 12 13The parsing work is handled by the class MetaFile. 14We only pay attention to a subset of the information in the 15"meta" files. Specifically: 16 17'CWD' to initialize our notion. 18 19'C' to track chdir(2) on a per process basis 20 21'R' files read are what we really care about. 22 directories read, provide a clue to resolving 23 subsequent relative paths. That is if we cannot find 24 them relative to 'cwd', we check relative to the last 25 dir read. 26 27'W' files opened for write or read-write, 28 for filemon V3 and earlier. 29 30'E' files executed. 31 32'L' files linked 33 34'V' the filemon version, this record is used as a clue 35 that we have reached the interesting bit. 36 37""" 38 39""" 40RCSid: 41 $Id: meta2deps.py,v 1.18 2015/04/03 18:23:25 sjg Exp $ 42 43 Copyright (c) 2011-2013, Juniper Networks, Inc. 44 All rights reserved. 45 46 Redistribution and use in source and binary forms, with or without 47 modification, are permitted provided that the following conditions 48 are met: 49 1. Redistributions of source code must retain the above copyright 50 notice, this list of conditions and the following disclaimer. 51 2. Redistributions in binary form must reproduce the above copyright 52 notice, this list of conditions and the following disclaimer in the 53 documentation and/or other materials provided with the distribution. 54 55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 66 67""" 68 69import os, re, sys 70 71def getv(dict, key, d=None): 72 """Lookup key in dict and return value or the supplied default.""" 73 if key in dict: 74 return dict[key] 75 return d 76 77def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 78 """ 79 Return an absolute path, resolving via cwd or last_dir if needed. 80 """ 81 if path.endswith('/.'): 82 path = path[0:-2] 83 if len(path) > 0 and path[0] == '/': 84 return path 85 if path == '.': 86 return cwd 87 if path.startswith('./'): 88 return cwd + path[1:] 89 if last_dir == cwd: 90 last_dir = None 91 for d in [last_dir, cwd]: 92 if not d: 93 continue 94 p = '/'.join([d,path]) 95 if debug > 2: 96 print("looking for:", p, end=' ', file=debug_out) 97 if not os.path.exists(p): 98 if debug > 2: 99 print("nope", file=debug_out) 100 p = None 101 continue 102 if debug > 2: 103 print("found:", p, file=debug_out) 104 return p 105 return None 106 107def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 108 """ 109 Return an absolute path, resolving via cwd or last_dir if needed. 110 this gets called a lot, so we try to avoid calling realpath 111 until we know we have something. 112 """ 113 rpath = resolve(path, cwd, last_dir, debug, debug_out) 114 if rpath: 115 path = rpath 116 if (path.find('/') < 0 or 117 path.find('./') > 0 or 118 path.endswith('/..') or 119 os.path.islink(path)): 120 return os.path.realpath(path) 121 return path 122 123def sort_unique(list, cmp=None, key=None, reverse=False): 124 list.sort(cmp, key, reverse) 125 nl = [] 126 le = None 127 for e in list: 128 if e == le: 129 continue 130 nl.append(e) 131 return nl 132 133def add_trims(x): 134 return ['/' + x + '/', 135 '/' + x, 136 x + '/', 137 x] 138 139class MetaFile: 140 """class to parse meta files generated by bmake.""" 141 142 conf = None 143 dirdep_re = None 144 host_target = None 145 srctops = [] 146 objroots = [] 147 excludes = [] 148 seen = {} 149 obj_deps = [] 150 src_deps = [] 151 file_deps = [] 152 153 def __init__(self, name, conf={}): 154 """if name is set we will parse it now. 155 conf can have the follwing keys: 156 157 SRCTOPS list of tops of the src tree(s). 158 159 CURDIR the src directory 'bmake' was run from. 160 161 RELDIR the relative path from SRCTOP to CURDIR 162 163 MACHINE the machine we built for. 164 set to 'none' if we are not cross-building. 165 More specifically if machine cannot be deduced from objdirs. 166 167 TARGET_SPEC 168 Sometimes MACHINE isn't enough. 169 170 HOST_TARGET 171 when we build for the pseudo machine 'host' 172 the object tree uses HOST_TARGET rather than MACHINE. 173 174 OBJROOTS a list of the common prefix for all obj dirs it might 175 end in '/' or '-'. 176 177 DPDEPS names an optional file to which per file dependencies 178 will be appended. 179 For example if 'some/path/foo.h' is read from SRCTOP 180 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 181 This can allow 'bmake' to learn all the dirs within 182 the tree that depend on 'foo.h' 183 184 EXCLUDES 185 A list of paths to ignore. 186 ccache(1) can otherwise be trouble. 187 188 debug desired debug level 189 190 debug_out open file to send debug output to (sys.stderr) 191 192 """ 193 194 self.name = name 195 self.debug = getv(conf, 'debug', 0) 196 self.debug_out = getv(conf, 'debug_out', sys.stderr) 197 198 self.machine = getv(conf, 'MACHINE', '') 199 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 200 self.target_spec = getv(conf, 'TARGET_SPEC', '') 201 self.curdir = getv(conf, 'CURDIR') 202 self.reldir = getv(conf, 'RELDIR') 203 self.dpdeps = getv(conf, 'DPDEPS') 204 self.line = 0 205 206 if not self.conf: 207 # some of the steps below we want to do only once 208 self.conf = conf 209 self.host_target = getv(conf, 'HOST_TARGET') 210 for srctop in getv(conf, 'SRCTOPS', []): 211 if srctop[-1] != '/': 212 srctop += '/' 213 if not srctop in self.srctops: 214 self.srctops.append(srctop) 215 _srctop = os.path.realpath(srctop) 216 if _srctop[-1] != '/': 217 _srctop += '/' 218 if not _srctop in self.srctops: 219 self.srctops.append(_srctop) 220 221 trim_list = add_trims(self.machine) 222 if self.machine == 'host': 223 trim_list += add_trims(self.host_target) 224 if self.target_spec: 225 trim_list += add_trims(self.target_spec) 226 227 for objroot in getv(conf, 'OBJROOTS', []): 228 for e in trim_list: 229 if objroot.endswith(e): 230 # this is not what we want - fix it 231 objroot = objroot[0:-len(e)] 232 if e.endswith('/'): 233 objroot += '/' 234 if not objroot in self.objroots: 235 self.objroots.append(objroot) 236 _objroot = os.path.realpath(objroot) 237 if objroot[-1] == '/': 238 _objroot += '/' 239 if not _objroot in self.objroots: 240 self.objroots.append(_objroot) 241 242 # we want the longest match 243 self.srctops.sort(reverse=True) 244 self.objroots.sort(reverse=True) 245 246 self.excludes = getv(conf, 'EXCLUDES', []) 247 248 if self.debug: 249 print("host_target=", self.host_target, file=self.debug_out) 250 print("srctops=", self.srctops, file=self.debug_out) 251 print("objroots=", self.objroots, file=self.debug_out) 252 print("excludes=", self.excludes, file=self.debug_out) 253 254 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 255 256 if self.dpdeps and not self.reldir: 257 if self.debug: 258 print("need reldir:", end=' ', file=self.debug_out) 259 if self.curdir: 260 srctop = self.find_top(self.curdir, self.srctops) 261 if srctop: 262 self.reldir = self.curdir.replace(srctop,'') 263 if self.debug: 264 print(self.reldir, file=self.debug_out) 265 if not self.reldir: 266 self.dpdeps = None # we cannot do it? 267 268 self.cwd = os.getcwd() # make sure this is initialized 269 self.last_dir = self.cwd 270 271 if name: 272 self.try_parse() 273 274 def reset(self): 275 """reset state if we are being passed meta files from multiple directories.""" 276 self.seen = {} 277 self.obj_deps = [] 278 self.src_deps = [] 279 self.file_deps = [] 280 281 def dirdeps(self, sep='\n'): 282 """return DIRDEPS""" 283 return sep.strip() + sep.join(self.obj_deps) 284 285 def src_dirdeps(self, sep='\n'): 286 """return SRC_DIRDEPS""" 287 return sep.strip() + sep.join(self.src_deps) 288 289 def file_depends(self, out=None): 290 """Append DPDEPS_${file} += ${RELDIR} 291 for each file we saw, to the output file.""" 292 if not self.reldir: 293 return None 294 for f in sort_unique(self.file_deps): 295 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 296 297 def seenit(self, dir): 298 """rememer that we have seen dir.""" 299 self.seen[dir] = 1 300 301 def add(self, list, data, clue=''): 302 """add data to list if it isn't already there.""" 303 if data not in list: 304 list.append(data) 305 if self.debug: 306 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 307 308 def find_top(self, path, list): 309 """the logical tree may be split across multiple trees""" 310 for top in list: 311 if path.startswith(top): 312 if self.debug > 2: 313 print("found in", top, file=self.debug_out) 314 return top 315 return None 316 317 def find_obj(self, objroot, dir, path, input): 318 """return path within objroot, taking care of .dirdep files""" 319 ddep = None 320 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 321 if not ddep and os.path.exists(ddepf): 322 ddep = open(ddepf, 'r').readline().strip('# \n') 323 if self.debug > 1: 324 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 325 if ddep.endswith(self.machine): 326 ddep = ddep[0:-(1+len(self.machine))] 327 elif self.target_spec and ddep.endswith(self.target_spec): 328 ddep = ddep[0:-(1+len(self.target_spec))] 329 330 if not ddep: 331 # no .dirdeps, so remember that we've seen the raw input 332 self.seenit(input) 333 self.seenit(dir) 334 if self.machine == 'none': 335 if dir.startswith(objroot): 336 return dir.replace(objroot,'') 337 return None 338 m = self.dirdep_re.match(dir.replace(objroot,'')) 339 if m: 340 ddep = m.group(2) 341 dmachine = m.group(1) 342 if dmachine != self.machine: 343 if not (self.machine == 'host' and 344 dmachine == self.host_target): 345 if self.debug > 2: 346 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 347 ddep += '.' + dmachine 348 349 return ddep 350 351 def try_parse(self, name=None, file=None): 352 """give file and line number causing exception""" 353 try: 354 self.parse(name, file) 355 except: 356 # give a useful clue 357 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 358 raise 359 360 def parse(self, name=None, file=None): 361 """A meta file looks like: 362 363 # Meta data file "path" 364 CMD "command-line" 365 CWD "cwd" 366 TARGET "target" 367 -- command output -- 368 -- filemon acquired metadata -- 369 # buildmon version 3 370 V 3 371 C "pid" "cwd" 372 E "pid" "path" 373 F "pid" "child" 374 R "pid" "path" 375 W "pid" "path" 376 X "pid" "status" 377 D "pid" "path" 378 L "pid" "src" "target" 379 M "pid" "old" "new" 380 S "pid" "path" 381 # Bye bye 382 383 We go to some effort to avoid processing a dependency more than once. 384 Of the above record types only C,E,F,L,R,V and W are of interest. 385 """ 386 387 version = 0 # unknown 388 if name: 389 self.name = name; 390 if file: 391 f = file 392 cwd = self.last_dir = self.cwd 393 else: 394 f = open(self.name, 'r') 395 skip = True 396 pid_cwd = {} 397 pid_last_dir = {} 398 last_pid = 0 399 400 self.line = 0 401 if self.curdir: 402 self.seenit(self.curdir) # we ignore this 403 404 interesting = 'CEFLRV' 405 for line in f: 406 self.line += 1 407 # ignore anything we don't care about 408 if not line[0] in interesting: 409 continue 410 if self.debug > 2: 411 print("input:", line, end=' ', file=self.debug_out) 412 w = line.split() 413 414 if skip: 415 if w[0] == 'V': 416 skip = False 417 version = int(w[1]) 418 """ 419 if version < 4: 420 # we cannot ignore 'W' records 421 # as they may be 'rw' 422 interesting += 'W' 423 """ 424 elif w[0] == 'CWD': 425 self.cwd = cwd = self.last_dir = w[1] 426 self.seenit(cwd) # ignore this 427 if self.debug: 428 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 429 continue 430 431 pid = int(w[1]) 432 if pid != last_pid: 433 if last_pid: 434 pid_cwd[last_pid] = cwd 435 pid_last_dir[last_pid] = self.last_dir 436 cwd = getv(pid_cwd, pid, self.cwd) 437 self.last_dir = getv(pid_last_dir, pid, self.cwd) 438 last_pid = pid 439 440 # process operations 441 if w[0] == 'F': 442 npid = int(w[2]) 443 pid_cwd[npid] = cwd 444 pid_last_dir[npid] = cwd 445 last_pid = npid 446 continue 447 elif w[0] == 'C': 448 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 449 if cwd.endswith('/.'): 450 cwd = cwd[0:-2] 451 self.last_dir = cwd 452 if self.debug > 1: 453 print("cwd=", cwd, file=self.debug_out) 454 continue 455 456 if w[2] in self.seen: 457 if self.debug > 2: 458 print("seen:", w[2], file=self.debug_out) 459 continue 460 # file operations 461 if w[0] in 'ML': 462 # these are special, tread src as read and 463 # target as write 464 self.parse_path(w[1].strip("'"), cwd, 'R', w) 465 self.parse_path(w[2].strip("'"), cwd, 'W', w) 466 continue 467 elif w[0] in 'ERWS': 468 path = w[2] 469 self.parse_path(path, cwd, w[0], w) 470 471 if not file: 472 f.close() 473 474 def parse_path(self, path, cwd, op=None, w=[]): 475 """look at a path for the op specified""" 476 477 if not op: 478 op = w[0] 479 480 # we are never interested in .dirdep files as dependencies 481 if path.endswith('.dirdep'): 482 return 483 for p in self.excludes: 484 if p and path.startswith(p): 485 if self.debug > 2: 486 print >> self.debug_out, "exclude:", p, path 487 return 488 # we don't want to resolve the last component if it is 489 # a symlink 490 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out) 491 if not path: 492 return 493 dir,base = os.path.split(path) 494 if dir in self.seen: 495 if self.debug > 2: 496 print("seen:", dir, file=self.debug_out) 497 return 498 # we can have a path in an objdir which is a link 499 # to the src dir, we may need to add dependencies for each 500 rdir = dir 501 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out) 502 if rdir == dir or rdir.find('./') > 0: 503 rdir = None 504 # now put path back together 505 path = '/'.join([dir,base]) 506 if self.debug > 1: 507 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 508 if op in 'RWS': 509 if path in [self.last_dir, cwd, self.cwd, self.curdir]: 510 if self.debug > 1: 511 print("skipping:", path, file=self.debug_out) 512 return 513 if os.path.isdir(path): 514 if op in 'RW': 515 self.last_dir = path; 516 if self.debug > 1: 517 print("ldir=", self.last_dir, file=self.debug_out) 518 return 519 520 if op in 'ERW': 521 # finally, we get down to it 522 if dir == self.cwd or dir == self.curdir: 523 return 524 srctop = self.find_top(path, self.srctops) 525 if srctop: 526 if self.dpdeps: 527 self.add(self.file_deps, path.replace(srctop,''), 'file') 528 self.add(self.src_deps, dir.replace(srctop,''), 'src') 529 self.seenit(w[2]) 530 self.seenit(dir) 531 if rdir and not rdir.startswith(srctop): 532 dir = rdir # for below 533 rdir = None 534 else: 535 return 536 537 objroot = None 538 for dir in [dir,rdir]: 539 if not dir: 540 continue 541 objroot = self.find_top(dir, self.objroots) 542 if objroot: 543 break 544 if objroot: 545 ddep = self.find_obj(objroot, dir, path, w[2]) 546 if ddep: 547 self.add(self.obj_deps, ddep, 'obj') 548 else: 549 # don't waste time looking again 550 self.seenit(w[2]) 551 self.seenit(dir) 552 553 554def main(argv, klass=MetaFile, xopts='', xoptf=None): 555 """Simple driver for class MetaFile. 556 557 Usage: 558 script [options] [key=value ...] "meta" ... 559 560 Options and key=value pairs contribute to the 561 dictionary passed to MetaFile. 562 563 -S "SRCTOP" 564 add "SRCTOP" to the "SRCTOPS" list. 565 566 -C "CURDIR" 567 568 -O "OBJROOT" 569 add "OBJROOT" to the "OBJROOTS" list. 570 571 -m "MACHINE" 572 573 -a "MACHINE_ARCH" 574 575 -H "HOST_TARGET" 576 577 -D "DPDEPS" 578 579 -d bumps debug level 580 581 """ 582 import getopt 583 584 # import Psyco if we can 585 # it can speed things up quite a bit 586 have_psyco = 0 587 try: 588 import psyco 589 psyco.full() 590 have_psyco = 1 591 except: 592 pass 593 594 conf = { 595 'SRCTOPS': [], 596 'OBJROOTS': [], 597 'EXCLUDES': [], 598 } 599 600 try: 601 machine = os.environ['MACHINE'] 602 if machine: 603 conf['MACHINE'] = machine 604 machine_arch = os.environ['MACHINE_ARCH'] 605 if machine_arch: 606 conf['MACHINE_ARCH'] = machine_arch 607 srctop = os.environ['SB_SRC'] 608 if srctop: 609 conf['SRCTOPS'].append(srctop) 610 objroot = os.environ['SB_OBJROOT'] 611 if objroot: 612 conf['OBJROOTS'].append(objroot) 613 except: 614 pass 615 616 debug = 0 617 output = True 618 619 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts) 620 for o, a in opts: 621 if o == '-a': 622 conf['MACHINE_ARCH'] = a 623 elif o == '-d': 624 debug += 1 625 elif o == '-q': 626 output = False 627 elif o == '-H': 628 conf['HOST_TARGET'] = a 629 elif o == '-S': 630 if a not in conf['SRCTOPS']: 631 conf['SRCTOPS'].append(a) 632 elif o == '-C': 633 conf['CURDIR'] = a 634 elif o == '-O': 635 if a not in conf['OBJROOTS']: 636 conf['OBJROOTS'].append(a) 637 elif o == '-R': 638 conf['RELDIR'] = a 639 elif o == '-D': 640 conf['DPDEPS'] = a 641 elif o == '-m': 642 conf['MACHINE'] = a 643 elif o == '-T': 644 conf['TARGET_SPEC'] = a 645 elif o == '-X': 646 if a not in conf['EXCLUDES']: 647 conf['EXCLUDES'].append(a) 648 elif xoptf: 649 xoptf(o, a, conf) 650 651 conf['debug'] = debug 652 653 # get any var=val assignments 654 eaten = [] 655 for a in args: 656 if a.find('=') > 0: 657 k,v = a.split('=') 658 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 659 if k == 'SRCTOP': 660 k = 'SRCTOPS' 661 elif k == 'OBJROOT': 662 k = 'OBJROOTS' 663 if v not in conf[k]: 664 conf[k].append(v) 665 else: 666 conf[k] = v 667 eaten.append(a) 668 continue 669 break 670 671 for a in eaten: 672 args.remove(a) 673 674 debug_out = getv(conf, 'debug_out', sys.stderr) 675 676 if debug: 677 print("config:", file=debug_out) 678 print("psyco=", have_psyco, file=debug_out) 679 for k,v in list(conf.items()): 680 print("%s=%s" % (k,v), file=debug_out) 681 682 m = None 683 for a in args: 684 if a.endswith('.meta'): 685 if not os.path.exists(a): 686 continue 687 m = klass(a, conf) 688 elif a.startswith('@'): 689 # there can actually multiple files per line 690 for line in open(a[1:]): 691 for f in line.strip().split(): 692 if not os.path.exists(f): 693 continue 694 m = klass(f, conf) 695 696 if output and m: 697 print(m.dirdeps()) 698 699 print(m.src_dirdeps('\nsrc:')) 700 701 dpdeps = getv(conf, 'DPDEPS') 702 if dpdeps: 703 m.file_depends(open(dpdeps, 'wb')) 704 705 return m 706 707if __name__ == '__main__': 708 try: 709 main(sys.argv) 710 except: 711 # yes, this goes to stdout 712 print("ERROR: ", sys.exc_info()[1]) 713 raise 714 715