1#!/usr/bin/env python 2 3""" 4This script parses each "meta" file and extracts the 5information needed to deduce build and src dependencies. 6 7It works much the same as the original shell script, but is 8*much* more efficient. 9 10The parsing work is handled by the class MetaFile. 11We only pay attention to a subset of the information in the 12"meta" files. Specifically: 13 14'CWD' to initialize our notion. 15 16'C' to track chdir(2) on a per process basis 17 18'R' files read are what we really care about. 19 directories read, provide a clue to resolving 20 subsequent relative paths. That is if we cannot find 21 them relative to 'cwd', we check relative to the last 22 dir read. 23 24'W' files opened for write or read-write, 25 for filemon V3 and earlier. 26 27'E' files executed. 28 29'L' files linked 30 31'V' the filemon version, this record is used as a clue 32 that we have reached the interesting bit. 33 34""" 35 36""" 37RCSid: 38 $Id: meta2deps.py,v 1.13 2013/05/11 05:16:26 sjg Exp $ 39 40 Copyright (c) 2011-2013, Juniper Networks, Inc. 41 All rights reserved. 42 43 Redistribution and use in source and binary forms, with or without 44 modification, are permitted provided that the following conditions 45 are met: 46 1. Redistributions of source code must retain the above copyright 47 notice, this list of conditions and the following disclaimer. 48 2. Redistributions in binary form must reproduce the above copyright 49 notice, this list of conditions and the following disclaimer in the 50 documentation and/or other materials provided with the distribution. 51 52 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 53 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 54 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 55 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 56 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 57 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 58 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 59 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 60 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 61 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 62 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 63 64""" 65 66import os, re, sys 67 68def getv(dict, key, d=None): 69 """Lookup key in dict and return value or the supplied default.""" 70 if key in dict: 71 return dict[key] 72 return d 73 74def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 75 """ 76 Return an absolute path, resolving via cwd or last_dir if needed. 77 """ 78 if path.endswith('/.'): 79 path = path[0:-2] 80 if path[0] == '/': 81 return path 82 if path == '.': 83 return cwd 84 if path.startswith('./'): 85 return cwd + path[1:] 86 if last_dir == cwd: 87 last_dir = None 88 for d in [last_dir, cwd]: 89 if not d: 90 continue 91 p = '/'.join([d,path]) 92 if debug > 2: 93 print >> debug_out, "looking for:", p, 94 if not os.path.exists(p): 95 if debug > 2: 96 print >> debug_out, "nope" 97 p = None 98 continue 99 if debug > 2: 100 print >> debug_out, "found:", p 101 return p 102 return None 103 104def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr): 105 """ 106 Return an absolute path, resolving via cwd or last_dir if needed. 107 this gets called a lot, so we try to avoid calling realpath 108 until we know we have something. 109 """ 110 path = resolve(path, cwd, last_dir, debug, debug_out) 111 if path and (path.find('./') > 0 or 112 path.endswith('/..') or 113 os.path.islink(path)): 114 return os.path.realpath(path) 115 return path 116 117def sort_unique(list, cmp=None, key=None, reverse=False): 118 list.sort(cmp, key, reverse) 119 nl = [] 120 le = None 121 for e in list: 122 if e == le: 123 continue 124 nl.append(e) 125 return nl 126 127def add_trims(x): 128 return ['/' + x + '/', 129 '/' + x, 130 x + '/', 131 x] 132 133class MetaFile: 134 """class to parse meta files generated by bmake.""" 135 136 conf = None 137 dirdep_re = None 138 host_target = None 139 srctops = [] 140 objroots = [] 141 142 seen = {} 143 obj_deps = [] 144 src_deps = [] 145 file_deps = [] 146 147 def __init__(self, name, conf={}): 148 """if name is set we will parse it now. 149 conf can have the follwing keys: 150 151 SRCTOPS list of tops of the src tree(s). 152 153 CURDIR the src directory 'bmake' was run from. 154 155 RELDIR the relative path from SRCTOP to CURDIR 156 157 MACHINE the machine we built for. 158 set to 'none' if we are not cross-building. 159 More specifically if machine cannot be deduced from objdirs. 160 161 TARGET_SPEC 162 Sometimes MACHINE isn't enough. 163 164 HOST_TARGET 165 when we build for the psuedo machine 'host' 166 the object tree uses HOST_TARGET rather than MACHINE. 167 168 OBJROOTS a list of the common prefix for all obj dirs it might 169 end in '/' or '-'. 170 171 DPDEPS names an optional file to which per file dependencies 172 will be appended. 173 For example if 'some/path/foo.h' is read from SRCTOP 174 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output. 175 This can allow 'bmake' to learn all the dirs within 176 the tree that depend on 'foo.h' 177 178 debug desired debug level 179 180 debug_out open file to send debug output to (sys.stderr) 181 182 """ 183 184 self.name = name 185 self.debug = getv(conf, 'debug', 0) 186 self.debug_out = getv(conf, 'debug_out', sys.stderr) 187 188 self.machine = getv(conf, 'MACHINE', '') 189 self.machine_arch = getv(conf, 'MACHINE_ARCH', '') 190 self.target_spec = getv(conf, 'TARGET_SPEC', '') 191 self.curdir = getv(conf, 'CURDIR') 192 self.reldir = getv(conf, 'RELDIR') 193 self.dpdeps = getv(conf, 'DPDEPS') 194 195 if not self.conf: 196 # some of the steps below we want to do only once 197 self.conf = conf 198 self.host_target = getv(conf, 'HOST_TARGET') 199 for srctop in getv(conf, 'SRCTOPS', []): 200 if srctop[-1] != '/': 201 srctop += '/' 202 if not srctop in self.srctops: 203 self.srctops.append(srctop) 204 _srctop = os.path.realpath(srctop) 205 if _srctop[-1] != '/': 206 _srctop += '/' 207 if not _srctop in self.srctops: 208 self.srctops.append(_srctop) 209 210 trim_list = add_trims(self.machine) 211 if self.machine == 'host': 212 trim_list += add_trims(self.host_target) 213 if self.target_spec: 214 trim_list += add_trims(self.target_spec) 215 216 for objroot in getv(conf, 'OBJROOTS', []): 217 for e in trim_list: 218 if objroot.endswith(e): 219 # this is not what we want - fix it 220 objroot = objroot[0:-len(e)] 221 if e.endswith('/'): 222 objroot += '/' 223 if not objroot in self.objroots: 224 self.objroots.append(objroot) 225 _objroot = os.path.realpath(objroot) 226 if objroot[-1] == '/': 227 _objroot += '/' 228 if not _objroot in self.objroots: 229 self.objroots.append(_objroot) 230 231 # we want the longest match 232 self.srctops.sort(reverse=True) 233 self.objroots.sort(reverse=True) 234 235 if self.debug: 236 print >> self.debug_out, "host_target=", self.host_target 237 print >> self.debug_out, "srctops=", self.srctops 238 print >> self.debug_out, "objroots=", self.objroots 239 240 self.dirdep_re = re.compile(r'([^/]+)/(.+)') 241 242 if self.dpdeps and not self.reldir: 243 if self.debug: 244 print >> self.debug_out, "need reldir:", 245 if self.curdir: 246 srctop = self.find_top(self.curdir, self.srctops) 247 if srctop: 248 self.reldir = self.curdir.replace(srctop,'') 249 if self.debug: 250 print >> self.debug_out, self.reldir 251 if not self.reldir: 252 self.dpdeps = None # we cannot do it? 253 254 self.cwd = os.getcwd() # make sure this is initialized 255 256 if name: 257 self.parse() 258 259 def reset(self): 260 """reset state if we are being passed meta files from multiple directories.""" 261 self.seen = {} 262 self.obj_deps = [] 263 self.src_deps = [] 264 self.file_deps = [] 265 266 def dirdeps(self, sep='\n'): 267 """return DIRDEPS""" 268 return sep.strip() + sep.join(self.obj_deps) 269 270 def src_dirdeps(self, sep='\n'): 271 """return SRC_DIRDEPS""" 272 return sep.strip() + sep.join(self.src_deps) 273 274 def file_depends(self, out=None): 275 """Append DPDEPS_${file} += ${RELDIR} 276 for each file we saw, to the output file.""" 277 if not self.reldir: 278 return None 279 for f in sort_unique(self.file_deps): 280 print >> out, 'DPDEPS_%s += %s' % (f, self.reldir) 281 282 def seenit(self, dir): 283 """rememer that we have seen dir.""" 284 self.seen[dir] = 1 285 286 def add(self, list, data, clue=''): 287 """add data to list if it isn't already there.""" 288 if data not in list: 289 list.append(data) 290 if self.debug: 291 print >> self.debug_out, "%s: %sAdd: %s" % (self.name, clue, data) 292 293 def find_top(self, path, list): 294 """the logical tree may be split accross multiple trees""" 295 for top in list: 296 if path.startswith(top): 297 if self.debug > 2: 298 print >> self.debug_out, "found in", top 299 return top 300 return None 301 302 def find_obj(self, objroot, dir, path, input): 303 """return path within objroot, taking care of .dirdep files""" 304 ddep = None 305 for ddepf in [path + '.dirdep', dir + '/.dirdep']: 306 if not ddep and os.path.exists(ddepf): 307 ddep = open(ddepf, 'rb').readline().strip('# \n') 308 if self.debug > 1: 309 print >> self.debug_out, "found %s: %s\n" % (ddepf, ddep) 310 if ddep.endswith(self.machine): 311 ddep = ddep[0:-(1+len(self.machine))] 312 elif self.target_spec and ddep.endswith(self.target_spec): 313 ddep = ddep[0:-(1+len(self.target_spec))] 314 315 if not ddep: 316 # no .dirdeps, so remember that we've seen the raw input 317 self.seenit(input) 318 self.seenit(dir) 319 if self.machine == 'none': 320 if dir.startswith(objroot): 321 return dir.replace(objroot,'') 322 return None 323 m = self.dirdep_re.match(dir.replace(objroot,'')) 324 if m: 325 ddep = m.group(2) 326 dmachine = m.group(1) 327 if dmachine != self.machine: 328 if not (self.machine == 'host' and 329 dmachine == self.host_target): 330 if self.debug > 2: 331 print >> self.debug_out, "adding .%s to %s" % (dmachine, ddep) 332 ddep += '.' + dmachine 333 334 return ddep 335 336 def parse(self, name=None, file=None): 337 """A meta file looks like: 338 339 # Meta data file "path" 340 CMD "command-line" 341 CWD "cwd" 342 TARGET "target" 343 -- command output -- 344 -- filemon acquired metadata -- 345 # buildmon version 3 346 V 3 347 C "pid" "cwd" 348 E "pid" "path" 349 F "pid" "child" 350 R "pid" "path" 351 W "pid" "path" 352 X "pid" "status" 353 D "pid" "path" 354 L "pid" "src" "target" 355 M "pid" "old" "new" 356 S "pid" "path" 357 # Bye bye 358 359 We go to some effort to avoid processing a dependency more than once. 360 Of the above record types only C,E,F,L,R,V and W are of interest. 361 """ 362 363 version = 0 # unknown 364 if name: 365 self.name = name; 366 if file: 367 f = file 368 cwd = last_dir = self.cwd 369 else: 370 f = open(self.name, 'rb') 371 skip = True 372 pid_cwd = {} 373 pid_last_dir = {} 374 last_pid = 0 375 376 if self.curdir: 377 self.seenit(self.curdir) # we ignore this 378 379 interesting = 'CEFLRV' 380 for line in f: 381 # ignore anything we don't care about 382 if not line[0] in interesting: 383 continue 384 if self.debug > 2: 385 print >> self.debug_out, "input:", line, 386 w = line.split() 387 388 if skip: 389 if w[0] == 'V': 390 skip = False 391 version = int(w[1]) 392 """ 393 if version < 4: 394 # we cannot ignore 'W' records 395 # as they may be 'rw' 396 interesting += 'W' 397 """ 398 elif w[0] == 'CWD': 399 self.cwd = cwd = last_dir = w[1] 400 self.seenit(cwd) # ignore this 401 if self.debug: 402 print >> self.debug_out, "%s: CWD=%s" % (self.name, cwd) 403 continue 404 405 pid = int(w[1]) 406 if pid != last_pid: 407 if last_pid: 408 pid_cwd[last_pid] = cwd 409 pid_last_dir[last_pid] = last_dir 410 cwd = getv(pid_cwd, pid, self.cwd) 411 last_dir = getv(pid_last_dir, pid, self.cwd) 412 last_pid = pid 413 414 # process operations 415 if w[0] == 'F': 416 npid = int(w[2]) 417 pid_cwd[npid] = cwd 418 pid_last_dir[npid] = cwd 419 last_pid = npid 420 continue 421 elif w[0] == 'C': 422 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out) 423 if cwd.endswith('/.'): 424 cwd = cwd[0:-2] 425 last_dir = cwd 426 if self.debug > 1: 427 print >> self.debug_out, "cwd=", cwd 428 continue 429 430 if w[2] in self.seen: 431 if self.debug > 2: 432 print >> self.debug_out, "seen:", w[2] 433 continue 434 # file operations 435 if w[0] in 'ML': 436 path = w[2].strip("'") 437 else: 438 path = w[2] 439 # we are never interested in .dirdep files as dependencies 440 if path.endswith('.dirdep'): 441 continue 442 # we don't want to resolve the last component if it is 443 # a symlink 444 path = resolve(path, cwd, last_dir, self.debug, self.debug_out) 445 if not path: 446 continue 447 dir,base = os.path.split(path) 448 if dir in self.seen: 449 if self.debug > 2: 450 print >> self.debug_out, "seen:", dir 451 continue 452 # we can have a path in an objdir which is a link 453 # to the src dir, we may need to add dependencies for each 454 rdir = dir 455 dir = abspath(dir, cwd, last_dir, self.debug, self.debug_out) 456 if rdir == dir or rdir.find('./') > 0: 457 rdir = None 458 # now put path back together 459 path = '/'.join([dir,base]) 460 if self.debug > 1: 461 print >> self.debug_out, "raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path) 462 if w[0] in 'SRWL': 463 if w[0] == 'W' and path.endswith('.dirdep'): 464 continue 465 if path in [last_dir, cwd, self.cwd, self.curdir]: 466 if self.debug > 1: 467 print >> self.debug_out, "skipping:", path 468 continue 469 if os.path.isdir(path): 470 if w[0] in 'RW': 471 last_dir = path; 472 if self.debug > 1: 473 print >> self.debug_out, "ldir=", last_dir 474 continue 475 476 if w[0] in 'REWML': 477 # finally, we get down to it 478 if dir == self.cwd or dir == self.curdir: 479 continue 480 srctop = self.find_top(path, self.srctops) 481 if srctop: 482 if self.dpdeps: 483 self.add(self.file_deps, path.replace(srctop,''), 'file') 484 self.add(self.src_deps, dir.replace(srctop,''), 'src') 485 self.seenit(w[2]) 486 self.seenit(dir) 487 if rdir and not rdir.startswith(srctop): 488 dir = rdir # for below 489 rdir = None 490 else: 491 continue 492 493 objroot = None 494 for dir in [dir,rdir]: 495 if not dir: 496 continue 497 objroot = self.find_top(dir, self.objroots) 498 if objroot: 499 break 500 if objroot: 501 ddep = self.find_obj(objroot, dir, path, w[2]) 502 if ddep: 503 self.add(self.obj_deps, ddep, 'obj') 504 else: 505 # don't waste time looking again 506 self.seenit(w[2]) 507 self.seenit(dir) 508 if not file: 509 f.close() 510 511 512def main(argv, klass=MetaFile, xopts='', xoptf=None): 513 """Simple driver for class MetaFile. 514 515 Usage: 516 script [options] [key=value ...] "meta" ... 517 518 Options and key=value pairs contribute to the 519 dictionary passed to MetaFile. 520 521 -S "SRCTOP" 522 add "SRCTOP" to the "SRCTOPS" list. 523 524 -C "CURDIR" 525 526 -O "OBJROOT" 527 add "OBJROOT" to the "OBJROOTS" list. 528 529 -m "MACHINE" 530 531 -a "MACHINE_ARCH" 532 533 -H "HOST_TARGET" 534 535 -D "DPDEPS" 536 537 -d bumps debug level 538 539 """ 540 import getopt 541 542 # import Psyco if we can 543 # it can speed things up quite a bit 544 have_psyco = 0 545 try: 546 import psyco 547 psyco.full() 548 have_psyco = 1 549 except: 550 pass 551 552 conf = { 553 'SRCTOPS': [], 554 'OBJROOTS': [], 555 } 556 557 try: 558 machine = os.environ['MACHINE'] 559 if machine: 560 conf['MACHINE'] = machine 561 machine_arch = os.environ['MACHINE_ARCH'] 562 if machine_arch: 563 conf['MACHINE_ARCH'] = machine_arch 564 srctop = os.environ['SB_SRC'] 565 if srctop: 566 conf['SRCTOPS'].append(srctop) 567 objroot = os.environ['SB_OBJROOT'] 568 if objroot: 569 conf['OBJROOTS'].append(objroot) 570 except: 571 pass 572 573 debug = 0 574 output = True 575 576 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:' + xopts) 577 for o, a in opts: 578 if o == '-a': 579 conf['MACHINE_ARCH'] = a 580 elif o == '-d': 581 debug += 1 582 elif o == '-q': 583 output = False 584 elif o == '-H': 585 conf['HOST_TARGET'] = a 586 elif o == '-S': 587 if a not in conf['SRCTOPS']: 588 conf['SRCTOPS'].append(a) 589 elif o == '-C': 590 conf['CURDIR'] = a 591 elif o == '-O': 592 if a not in conf['OBJROOTS']: 593 conf['OBJROOTS'].append(a) 594 elif o == '-R': 595 conf['RELDIR'] = a 596 elif o == '-D': 597 conf['DPDEPS'] = a 598 elif o == '-m': 599 conf['MACHINE'] = a 600 elif o == '-T': 601 conf['TARGET_SPEC'] = a 602 elif xoptf: 603 xoptf(o, a, conf) 604 605 conf['debug'] = debug 606 607 # get any var=val assignments 608 eaten = [] 609 for a in args: 610 if a.find('=') > 0: 611 k,v = a.split('=') 612 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']: 613 if k == 'SRCTOP': 614 k = 'SRCTOPS' 615 elif k == 'OBJROOT': 616 k = 'OBJROOTS' 617 if v not in conf[k]: 618 conf[k].append(v) 619 else: 620 conf[k] = v 621 eaten.append(a) 622 continue 623 break 624 625 for a in eaten: 626 args.remove(a) 627 628 debug_out = getv(conf, 'debug_out', sys.stderr) 629 630 if debug: 631 print >> debug_out, "config:" 632 print >> debug_out, "psyco=", have_psyco 633 for k,v in conf.items(): 634 print >> debug_out, "%s=%s" % (k,v) 635 636 for a in args: 637 m = klass(a, conf) 638 639 if output: 640 print m.dirdeps() 641 642 print m.src_dirdeps('\nsrc:') 643 644 dpdeps = getv(conf, 'DPDEPS') 645 if dpdeps: 646 m.file_depends(open(dpdeps, 'wb')) 647 648 return m 649 650if __name__ == '__main__': 651 try: 652 main(sys.argv) 653 except: 654 # yes, this goes to stdout 655 print "ERROR: ", sys.exc_info()[1] 656 raise 657 658