1#!/usr/bin/env python 2 3from __future__ import print_function 4 5""" 6This script parses each "meta" file and extracts the 7information needed to deduce build and src dependencies. 8 9It works much the same as the original shell script, but is 10*much* more efficient. 11 12The parsing work is handled by the class MetaFile. 13We only pay attention to a subset of the information in the 14"meta" files. Specifically: 15 16'CWD' to initialize our notion. 17 18'C' to track chdir(2) on a per process basis 19 20'R' files read are what we really care about. 21 directories read, provide a clue to resolving 22 subsequent relative paths. That is if we cannot find 23 them relative to 'cwd', we check relative to the last 24 dir read. 25 26'W' files opened for write or read-write, 27 for filemon V3 and earlier. 28 29'E' files executed. 30 31'L' files linked 32 33'V' the filemon version, this record is used as a clue 34 that we have reached the interesting bit. 35 36""" 37 38""" 39RCSid: 40 $Id: meta2deps.py,v 1.17 2014/04/05 22:56:54 sjg Exp $ 41 42 Copyright (c) 2011-2013, Juniper Networks, Inc. 43 All rights reserved. 44 45 Redistribution and use in source and binary forms, with or without 46 modification, are permitted provided that the following conditions 47 are met: 48 1. Redistributions of source code must retain the above copyright 49 notice, this list of conditions and the following disclaimer. 50 2. Redistributions in binary form must reproduce the above copyright 51 notice, this list of conditions and the following disclaimer in the 52 documentation and/or other materials provided with the distribution. 53 54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 65 66""" 67 68import os, re, sys 69 70def getv(dict, key, d=None): 71 """Lookup key in dict and return value or the supplied default.""" 72 if key in dict: 73 return dict[key] 74 return d 75 76def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 77 """ 78 Return an absolute path, resolving via cwd or last_dir if needed. 79 """ 80 if path.endswith('/.'): 81 path = path[0:-2] 82 if len(path) > 0 and path[0] == '/': 83 return path 84 if path == '.': 85 return cwd 86 if path.startswith('./'): 87 return cwd + path[1:] 88 if last_dir == cwd: 89 last_dir = None 90 for d in [last_dir, cwd]: 91 if not d: 92 continue 93 p = '/'.join([d,path]) 94 if debug > 2: 95 print("looking for:", p, end=' ', file=debug_out) 96 if not os.path.exists(p): 97 if debug > 2: 98 print("nope", file=debug_out) 99 p = None 100 continue 101 if debug > 2: 102 print("found:", p, file=debug_out) 103 return p 104 return None 105 106def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 107 """ 108 Return an absolute path, resolving via cwd or last_dir if needed. 109 this gets called a lot, so we try to avoid calling realpath 110 until we know we have something. 111 """ 112 rpath = resolve(path, cwd, last_dir, debug, debug_out) 113 if rpath: 114 path = rpath 115 if (path.find('./') > 0 or 116 path.endswith('/..') or 117 os.path.islink(path)): 118 return os.path.realpath(path) 119 return path 120 121def sort_unique(list, cmp=None, key=None, reverse=False): 122 list.sort(cmp, key, reverse) 123 nl = [] 124 le = None 125 for e in list: 126 if e == le: 127 continue 128 nl.append(e) 129 return nl 130 131def add_trims(x): 132 return ['/' + x + '/', 133 '/' + x, 134 x + '/', 135 x] 136 137class MetaFile: 138 """class to parse meta files generated by bmake.""" 139 140 conf = None 141 dirdep_re = None 142 host_target = None 143 srctops = [] 144 objroots = [] 145 146 seen = {} 147 obj_deps = [] 148 src_deps = [] 149 file_deps = [] 150 151 def __init__(self, name, conf={}): 152 """if name is set we will parse it now. 153 conf can have the follwing keys: 154 155 SRCTOPS list of tops of the src tree(s). 156 157 CURDIR the src directory 'bmake' was run from. 158 159 RELDIR the relative path from SRCTOP to CURDIR 160 161 MACHINE the machine we built for. 162 set to 'none' if we are not cross-building. 163 More specifically if machine cannot be deduced from objdirs. 164 165 TARGET_SPEC 166 Sometimes MACHINE isn't enough. 167 168 HOST_TARGET 169 when we build for the pseudo machine 'host' 170 the object tree uses HOST_TARGET rather than MACHINE. 171 172 OBJROOTS a list of the common prefix for all obj dirs it might 173 end in '/' or '-'. 174 175 DPDEPS names an optional file to which per file dependencies 176 will be appended. 177 For example if 'some/path/foo.h' is read from SRCTOP 178 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 179 This can allow 'bmake' to learn all the dirs within 180 the tree that depend on 'foo.h' 181 182 debug desired debug level 183 184 debug_out open file to send debug output to (sys.stderr) 185 186 """ 187 188 self.name = name 189 self.debug = getv(conf, 'debug', 0) 190 self.debug_out = getv(conf, 'debug_out', sys.stderr) 191 192 self.machine = getv(conf, 'MACHINE', '') 193 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 194 self.target_spec = getv(conf, 'TARGET_SPEC', '') 195 self.curdir = getv(conf, 'CURDIR') 196 self.reldir = getv(conf, 'RELDIR') 197 self.dpdeps = getv(conf, 'DPDEPS') 198 self.line = 0 199 200 if not self.conf: 201 # some of the steps below we want to do only once 202 self.conf = conf 203 self.host_target = getv(conf, 'HOST_TARGET') 204 for srctop in getv(conf, 'SRCTOPS', []): 205 if srctop[-1] != '/': 206 srctop += '/' 207 if not srctop in self.srctops: 208 self.srctops.append(srctop) 209 _srctop = os.path.realpath(srctop) 210 if _srctop[-1] != '/': 211 _srctop += '/' 212 if not _srctop in self.srctops: 213 self.srctops.append(_srctop) 214 215 trim_list = add_trims(self.machine) 216 if self.machine == 'host': 217 trim_list += add_trims(self.host_target) 218 if self.target_spec: 219 trim_list += add_trims(self.target_spec) 220 221 for objroot in getv(conf, 'OBJROOTS', []): 222 for e in trim_list: 223 if objroot.endswith(e): 224 # this is not what we want - fix it 225 objroot = objroot[0:-len(e)] 226 if e.endswith('/'): 227 objroot += '/' 228 if not objroot in self.objroots: 229 self.objroots.append(objroot) 230 _objroot = os.path.realpath(objroot) 231 if objroot[-1] == '/': 232 _objroot += '/' 233 if not _objroot in self.objroots: 234 self.objroots.append(_objroot) 235 236 # we want the longest match 237 self.srctops.sort(reverse=True) 238 self.objroots.sort(reverse=True) 239 240 if self.debug: 241 print("host_target=", self.host_target, file=self.debug_out) 242 print("srctops=", self.srctops, file=self.debug_out) 243 print("objroots=", self.objroots, file=self.debug_out) 244 245 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 246 247 if self.dpdeps and not self.reldir: 248 if self.debug: 249 print("need reldir:", end=' ', file=self.debug_out) 250 if self.curdir: 251 srctop = self.find_top(self.curdir, self.srctops) 252 if srctop: 253 self.reldir = self.curdir.replace(srctop,'') 254 if self.debug: 255 print(self.reldir, file=self.debug_out) 256 if not self.reldir: 257 self.dpdeps = None # we cannot do it? 258 259 self.cwd = os.getcwd() # make sure this is initialized 260 261 if name: 262 self.try_parse() 263 264 def reset(self): 265 """reset state if we are being passed meta files from multiple directories.""" 266 self.seen = {} 267 self.obj_deps = [] 268 self.src_deps = [] 269 self.file_deps = [] 270 271 def dirdeps(self, sep='\n'): 272 """return DIRDEPS""" 273 return sep.strip() + sep.join(self.obj_deps) 274 275 def src_dirdeps(self, sep='\n'): 276 """return SRC_DIRDEPS""" 277 return sep.strip() + sep.join(self.src_deps) 278 279 def file_depends(self, out=None): 280 """Append DPDEPS_${file} += ${RELDIR} 281 for each file we saw, to the output file.""" 282 if not self.reldir: 283 return None 284 for f in sort_unique(self.file_deps): 285 print('DPDEPS_%s += %s' % (f, self.reldir), file=out) 286 287 def seenit(self, dir): 288 """rememer that we have seen dir.""" 289 self.seen[dir] = 1 290 291 def add(self, list, data, clue=''): 292 """add data to list if it isn't already there.""" 293 if data not in list: 294 list.append(data) 295 if self.debug: 296 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out) 297 298 def find_top(self, path, list): 299 """the logical tree may be split across multiple trees""" 300 for top in list: 301 if path.startswith(top): 302 if self.debug > 2: 303 print("found in", top, file=self.debug_out) 304 return top 305 return None 306 307 def find_obj(self, objroot, dir, path, input): 308 """return path within objroot, taking care of .dirdep files""" 309 ddep = None 310 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 311 if not ddep and os.path.exists(ddepf): 312 ddep = open(ddepf, 'r').readline().strip('# \n') 313 if self.debug > 1: 314 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out) 315 if ddep.endswith(self.machine): 316 ddep = ddep[0:-(1+len(self.machine))] 317 elif self.target_spec and ddep.endswith(self.target_spec): 318 ddep = ddep[0:-(1+len(self.target_spec))] 319 320 if not ddep: 321 # no .dirdeps, so remember that we've seen the raw input 322 self.seenit(input) 323 self.seenit(dir) 324 if self.machine == 'none': 325 if dir.startswith(objroot): 326 return dir.replace(objroot,'') 327 return None 328 m = self.dirdep_re.match(dir.replace(objroot,'')) 329 if m: 330 ddep = m.group(2) 331 dmachine = m.group(1) 332 if dmachine != self.machine: 333 if not (self.machine == 'host' and 334 dmachine == self.host_target): 335 if self.debug > 2: 336 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out) 337 ddep += '.' + dmachine 338 339 return ddep 340 341 def try_parse(self, name=None, file=None): 342 """give file and line number causing exception""" 343 try: 344 self.parse(name, file) 345 except: 346 # give a useful clue 347 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr) 348 raise 349 350 def parse(self, name=None, file=None): 351 """A meta file looks like: 352 353 # Meta data file "path" 354 CMD "command-line" 355 CWD "cwd" 356 TARGET "target" 357 -- command output -- 358 -- filemon acquired metadata -- 359 # buildmon version 3 360 V 3 361 C "pid" "cwd" 362 E "pid" "path" 363 F "pid" "child" 364 R "pid" "path" 365 W "pid" "path" 366 X "pid" "status" 367 D "pid" "path" 368 L "pid" "src" "target" 369 M "pid" "old" "new" 370 S "pid" "path" 371 # Bye bye 372 373 We go to some effort to avoid processing a dependency more than once. 374 Of the above record types only C,E,F,L,R,V and W are of interest. 375 """ 376 377 version = 0 # unknown 378 if name: 379 self.name = name; 380 if file: 381 f = file 382 cwd = last_dir = self.cwd 383 else: 384 f = open(self.name, 'r') 385 skip = True 386 pid_cwd = {} 387 pid_last_dir = {} 388 last_pid = 0 389 390 self.line = 0 391 if self.curdir: 392 self.seenit(self.curdir) # we ignore this 393 394 interesting = 'CEFLRV' 395 for line in f: 396 self.line += 1 397 # ignore anything we don't care about 398 if not line[0] in interesting: 399 continue 400 if self.debug > 2: 401 print("input:", line, end=' ', file=self.debug_out) 402 w = line.split() 403 404 if skip: 405 if w[0] == 'V': 406 skip = False 407 version = int(w[1]) 408 """ 409 if version < 4: 410 # we cannot ignore 'W' records 411 # as they may be 'rw' 412 interesting += 'W' 413 """ 414 elif w[0] == 'CWD': 415 self.cwd = cwd = last_dir = w[1] 416 self.seenit(cwd) # ignore this 417 if self.debug: 418 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out) 419 continue 420 421 pid = int(w[1]) 422 if pid != last_pid: 423 if last_pid: 424 pid_cwd[last_pid] = cwd 425 pid_last_dir[last_pid] = last_dir 426 cwd = getv(pid_cwd, pid, self.cwd) 427 last_dir = getv(pid_last_dir, pid, self.cwd) 428 last_pid = pid 429 430 # process operations 431 if w[0] == 'F': 432 npid = int(w[2]) 433 pid_cwd[npid] = cwd 434 pid_last_dir[npid] = cwd 435 last_pid = npid 436 continue 437 elif w[0] == 'C': 438 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 439 if cwd.endswith('/.'): 440 cwd = cwd[0:-2] 441 last_dir = cwd 442 if self.debug > 1: 443 print("cwd=", cwd, file=self.debug_out) 444 continue 445 446 if w[2] in self.seen: 447 if self.debug > 2: 448 print("seen:", w[2], file=self.debug_out) 449 continue 450 # file operations 451 if w[0] in 'ML': 452 path = w[2].strip("'") 453 else: 454 path = w[2] 455 # we are never interested in .dirdep files as dependencies 456 if path.endswith('.dirdep'): 457 continue 458 # we don't want to resolve the last component if it is 459 # a symlink 460 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 461 if not path: 462 continue 463 dir,base = os.path.split(path) 464 if dir in self.seen: 465 if self.debug > 2: 466 print("seen:", dir, file=self.debug_out) 467 continue 468 # we can have a path in an objdir which is a link 469 # to the src dir, we may need to add dependencies for each 470 rdir = dir 471 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 472 if rdir == dir or rdir.find('./') > 0: 473 rdir = None 474 # now put path back together 475 path = '/'.join([dir,base]) 476 if self.debug > 1: 477 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out) 478 if w[0] in 'SRWL': 479 if w[0] == 'W' and path.endswith('.dirdep'): 480 continue 481 if path in [last_dir, cwd, self.cwd, self.curdir]: 482 if self.debug > 1: 483 print("skipping:", path, file=self.debug_out) 484 continue 485 if os.path.isdir(path): 486 if w[0] in 'RW': 487 last_dir = path; 488 if self.debug > 1: 489 print("ldir=", last_dir, file=self.debug_out) 490 continue 491 492 if w[0] in 'REWML': 493 # finally, we get down to it 494 if dir == self.cwd or dir == self.curdir: 495 continue 496 srctop = self.find_top(path, self.srctops) 497 if srctop: 498 if self.dpdeps: 499 self.add(self.file_deps, path.replace(srctop,''), 'file') 500 self.add(self.src_deps, dir.replace(srctop,''), 'src') 501 self.seenit(w[2]) 502 self.seenit(dir) 503 if rdir and not rdir.startswith(srctop): 504 dir = rdir # for below 505 rdir = None 506 else: 507 continue 508 509 objroot = None 510 for dir in [dir,rdir]: 511 if not dir: 512 continue 513 objroot = self.find_top(dir, self.objroots) 514 if objroot: 515 break 516 if objroot: 517 ddep = self.find_obj(objroot, dir, path, w[2]) 518 if ddep: 519 self.add(self.obj_deps, ddep, 'obj') 520 else: 521 # don't waste time looking again 522 self.seenit(w[2]) 523 self.seenit(dir) 524 if not file: 525 f.close() 526 527 528def main(argv, klass=MetaFile, xopts='', xoptf=None): 529 """Simple driver for class MetaFile. 530 531 Usage: 532 script [options] [key=value ...] "meta" ... 533 534 Options and key=value pairs contribute to the 535 dictionary passed to MetaFile. 536 537 -S "SRCTOP" 538 add "SRCTOP" to the "SRCTOPS" list. 539 540 -C "CURDIR" 541 542 -O "OBJROOT" 543 add "OBJROOT" to the "OBJROOTS" list. 544 545 -m "MACHINE" 546 547 -a "MACHINE_ARCH" 548 549 -H "HOST_TARGET" 550 551 -D "DPDEPS" 552 553 -d bumps debug level 554 555 """ 556 import getopt 557 558 # import Psyco if we can 559 # it can speed things up quite a bit 560 have_psyco = 0 561 try: 562 import psyco 563 psyco.full() 564 have_psyco = 1 565 except: 566 pass 567 568 conf = { 569 'SRCTOPS': [], 570 'OBJROOTS': [], 571 } 572 573 try: 574 machine = os.environ['MACHINE'] 575 if machine: 576 conf['MACHINE'] = machine 577 machine_arch = os.environ['MACHINE_ARCH'] 578 if machine_arch: 579 conf['MACHINE_ARCH'] = machine_arch 580 srctop = os.environ['SB_SRC'] 581 if srctop: 582 conf['SRCTOPS'].append(srctop) 583 objroot = os.environ['SB_OBJROOT'] 584 if objroot: 585 conf['OBJROOTS'].append(objroot) 586 except: 587 pass 588 589 debug = 0 590 output = True 591 592 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts) 593 for o, a in opts: 594 if o == '-a': 595 conf['MACHINE_ARCH'] = a 596 elif o == '-d': 597 debug += 1 598 elif o == '-q': 599 output = False 600 elif o == '-H': 601 conf['HOST_TARGET'] = a 602 elif o == '-S': 603 if a not in conf['SRCTOPS']: 604 conf['SRCTOPS'].append(a) 605 elif o == '-C': 606 conf['CURDIR'] = a 607 elif o == '-O': 608 if a not in conf['OBJROOTS']: 609 conf['OBJROOTS'].append(a) 610 elif o == '-R': 611 conf['RELDIR'] = a 612 elif o == '-D': 613 conf['DPDEPS'] = a 614 elif o == '-m': 615 conf['MACHINE'] = a 616 elif o == '-T': 617 conf['TARGET_SPEC'] = a 618 elif xoptf: 619 xoptf(o, a, conf) 620 621 conf['debug'] = debug 622 623 # get any var=val assignments 624 eaten = [] 625 for a in args: 626 if a.find('=') > 0: 627 k,v = a.split('=') 628 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 629 if k == 'SRCTOP': 630 k = 'SRCTOPS' 631 elif k == 'OBJROOT': 632 k = 'OBJROOTS' 633 if v not in conf[k]: 634 conf[k].append(v) 635 else: 636 conf[k] = v 637 eaten.append(a) 638 continue 639 break 640 641 for a in eaten: 642 args.remove(a) 643 644 debug_out = getv(conf, 'debug_out', sys.stderr) 645 646 if debug: 647 print("config:", file=debug_out) 648 print("psyco=", have_psyco, file=debug_out) 649 for k,v in list(conf.items()): 650 print("%s=%s" % (k,v), file=debug_out) 651 652 for a in args: 653 if a.endswith('.meta'): 654 m = klass(a, conf) 655 elif a.startswith('@'): 656 # there can actually multiple files per line 657 for line in open(a[1:]): 658 for f in line.strip().split(): 659 m = klass(f, conf) 660 661 if output: 662 print(m.dirdeps()) 663 664 print(m.src_dirdeps('\nsrc:')) 665 666 dpdeps = getv(conf, 'DPDEPS') 667 if dpdeps: 668 m.file_depends(open(dpdeps, 'wb')) 669 670 return m 671 672if __name__ == '__main__': 673 try: 674 main(sys.argv) 675 except: 676 # yes, this goes to stdout 677 print("ERROR: ", sys.exc_info()[1]) 678 raise 679 680